diff --git a/docs/IE_DG/InferenceEngine_QueryAPI.md b/docs/IE_DG/InferenceEngine_QueryAPI.md index 90fa6bfb2cb..f5b9399a240 100644 --- a/docs/IE_DG/InferenceEngine_QueryAPI.md +++ b/docs/IE_DG/InferenceEngine_QueryAPI.md @@ -71,6 +71,10 @@ Or the current temperature of `MYRIAD` device: @snippet snippets/InferenceEngine_QueryAPI4.cpp part4 +The code below demonstrates how to get memory statistics of `GPU` device: + +@snippet snippets/InferenceEngine_QueryAPI6.cpp part6 + ### GetConfig() The method is used to get information about configuration values the executable network has been created with: diff --git a/docs/snippets/InferenceEngine_QueryAPI6.cpp b/docs/snippets/InferenceEngine_QueryAPI6.cpp new file mode 100644 index 00000000000..b13812107bf --- /dev/null +++ b/docs/snippets/InferenceEngine_QueryAPI6.cpp @@ -0,0 +1,12 @@ +#include + +int main() { +using namespace InferenceEngine; +//! [part6] +InferenceEngine::Core core; +auto network = core.ReadNetwork("sample.xml"); +auto exeNetwork = core.LoadNetwork(network, "GPU"); +std::map statistics_map = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS)); +//! [part6] +return 0; +} diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp index be871ed5ab2..31c31ac2e6f 100644 --- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp @@ -128,6 +128,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) metrics.push_back(METRIC_KEY(SUPPORTED_METRICS)); metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)); + metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS)); IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { std::vector configKeys; @@ -139,6 +140,16 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY)) nr *= 2; IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr); + } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) { + std::map statistics; + if (m_context != nullptr) { + auto impl = getContextImpl(m_context); + impl->acquire_lock(); + std::shared_ptr eng = impl->GetEngine(); + eng->get_memory_statistics(&statistics); + impl->release_lock(); + } + IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics); } else { IE_THROW() << "Unsupported ExecutableNetwork metric: " << name; } diff --git a/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp index 003af2bb689..5a9dc1f0628 100644 --- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp @@ -45,6 +45,12 @@ DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string); */ DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int); +/** + * @brief Metric to get statistics of GPU memory allocated by engine for each allocation type + * It contains information about both current and peak memory usage + */ +DECLARE_GPU_METRIC_KEY(MEMORY_STATISTICS, std::map); + /** * @brief Possible return value for OPTIMIZATION_CAPABILITIES metric * - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp index a260d3a28d3..b4957b704c6 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp @@ -166,6 +166,28 @@ INSTANTIATE_TEST_SUITE_P( // Executable Network GetMetric // +using IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS = IEClassBaseTestP; +TEST_P(IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS, GetMetricNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + Core ie; + Parameter p; + + ExecutableNetwork exeNetwork = ie.LoadNetwork(simpleNetwork, deviceName); + + ASSERT_NO_THROW(p = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t = p; + + std::cout << "Memory Statistics: " << std::endl; + for (auto &&kv : t) { + std::cout << kv.first << ": " << kv.second << " bytes" << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P( + nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS, + ::testing::Values("GPU") +); + INSTANTIATE_TEST_SUITE_P( nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS, ::testing::Values("GPU", "MULTI:GPU", "HETERO:GPU", "AUTO:GPU,CPU") diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp index acec6f58e58..6f9ebf75e41 100644 --- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp @@ -109,6 +109,10 @@ public: /// Returns the amount of GPU memory specified allocation @p type that currently used by the engine uint64_t get_used_device_memory(allocation_type type) const; + /// Returns statistics of GPU memory allocated by engine in current process for all allocation types. + /// @note It contains information about both current and peak memory usage + void get_memory_statistics(std::map* statistics) const; + /// Adds @p bytes count to currently used memory size of the specified allocation @p type void add_memory_used(uint64_t bytes, allocation_type type); @@ -153,9 +157,10 @@ protected: engine(const device::ptr device, const engine_configuration& configuration); const device::ptr _device; engine_configuration _configuration; + mutable std::mutex _mutex; - std::map> memory_usage_map; - std::map> peak_memory_usage_map; + std::map> _memory_usage_map; + std::map> _peak_memory_usage_map; }; } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/engine.cpp b/inference-engine/thirdparty/clDNN/runtime/engine.cpp index 3738ec2ae9d..e0650bc161b 100644 --- a/inference-engine/thirdparty/clDNN/runtime/engine.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/engine.cpp @@ -120,48 +120,64 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint #endif // _WIN32 uint64_t engine::get_max_used_device_memory() const { + std::lock_guard guard(_mutex); uint64_t total_peak_memory_usage {0}; - for (auto const& m : peak_memory_usage_map) { + for (auto const& m : _peak_memory_usage_map) { total_peak_memory_usage += m.second.load(); } return total_peak_memory_usage; } uint64_t engine::get_max_used_device_memory(allocation_type type) const { + std::lock_guard guard(_mutex); uint64_t peak_memory_usage {0}; - auto iter = peak_memory_usage_map.find(type); - if (iter != peak_memory_usage_map.end()) { + auto iter = _peak_memory_usage_map.find(type); + if (iter != _peak_memory_usage_map.end()) { peak_memory_usage = iter->second.load(); } return peak_memory_usage; } uint64_t engine::get_used_device_memory(allocation_type type) const { + std::lock_guard guard(_mutex); uint64_t memory_usage {0}; - auto iter = memory_usage_map.find(type); - if (iter != memory_usage_map.end()) { + auto iter = _memory_usage_map.find(type); + if (iter != _memory_usage_map.end()) { memory_usage = iter->second.load(); } return memory_usage; } -void engine::add_memory_used(size_t bytes, allocation_type type) { - if (!memory_usage_map.count(type) && !peak_memory_usage_map.count(type)) { - static std::mutex m; - std::lock_guard guard(m); - memory_usage_map[type] = 0; - peak_memory_usage_map[type] = 0; +void engine::get_memory_statistics(std::map* statistics) const { + for (auto const& m : _memory_usage_map) { + std::ostringstream oss; + oss << m.first << "_current"; + (*statistics)[oss.str()] = m.second.load(); } - memory_usage_map[type] += bytes; - if (memory_usage_map[type] > peak_memory_usage_map[type]) { - peak_memory_usage_map[type] = memory_usage_map[type].load(); + for (auto const& m : _peak_memory_usage_map) { + std::ostringstream oss; + oss << m.first << "_peak"; + (*statistics)[oss.str()] = m.second.load(); + } +} + +void engine::add_memory_used(size_t bytes, allocation_type type) { + std::lock_guard guard(_mutex); + if (!_memory_usage_map.count(type) && !_peak_memory_usage_map.count(type)) { + _memory_usage_map[type] = 0; + _peak_memory_usage_map[type] = 0; + } + _memory_usage_map[type] += bytes; + if (_memory_usage_map[type] > _peak_memory_usage_map[type]) { + _peak_memory_usage_map[type] = _memory_usage_map[type].load(); } } void engine::subtract_memory_used(size_t bytes, allocation_type type) { - auto iter = memory_usage_map.find(type); - if (iter != memory_usage_map.end()) { - memory_usage_map[type] -= bytes; + std::lock_guard guard(_mutex); + auto iter = _memory_usage_map.find(type); + if (iter != _memory_usage_map.end()) { + _memory_usage_map[type] -= bytes; } else { throw std::runtime_error("Attempt to free unallocated memory"); }