[GPU] Add GPU plugin metric to get statistics of GPU memory allocated by engine (#7758)

This commit is contained in:
Andrew Kwangwoong Park 2021-10-13 11:40:51 +09:00 committed by GitHub
parent 9e1231ac8d
commit 4e2cc3e370
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 95 additions and 19 deletions

View File

@ -71,6 +71,10 @@ Or the current temperature of `MYRIAD` device:
@snippet snippets/InferenceEngine_QueryAPI4.cpp part4 @snippet snippets/InferenceEngine_QueryAPI4.cpp part4
The code below demonstrates how to get memory statistics of `GPU` device:
@snippet snippets/InferenceEngine_QueryAPI6.cpp part6
### GetConfig() ### GetConfig()
The method is used to get information about configuration values the executable network has been created with: The method is used to get information about configuration values the executable network has been created with:

View File

@ -0,0 +1,12 @@
#include <ie_core.hpp>
int main() {
using namespace InferenceEngine;
//! [part6]
InferenceEngine::Core core;
auto network = core.ReadNetwork("sample.xml");
auto exeNetwork = core.LoadNetwork(network, "GPU");
std::map<std::string, uint64_t> statistics_map = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS));
//! [part6]
return 0;
}

View File

@ -128,6 +128,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
metrics.push_back(METRIC_KEY(SUPPORTED_METRICS)); metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)); metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS));
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
std::vector<std::string> configKeys; std::vector<std::string> configKeys;
@ -139,6 +140,16 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY)) if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
nr *= 2; nr *= 2;
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr); IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
} else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
std::map<std::string, uint64_t> statistics;
if (m_context != nullptr) {
auto impl = getContextImpl(m_context);
impl->acquire_lock();
std::shared_ptr<cldnn::engine> eng = impl->GetEngine();
eng->get_memory_statistics(&statistics);
impl->release_lock();
}
IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics);
} else { } else {
IE_THROW() << "Unsupported ExecutableNetwork metric: " << name; IE_THROW() << "Unsupported ExecutableNetwork metric: " << name;
} }

View File

@ -45,6 +45,12 @@ DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
*/ */
DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int); DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
/**
* @brief Metric to get statistics of GPU memory allocated by engine for each allocation type
* It contains information about both current and peak memory usage
*/
DECLARE_GPU_METRIC_KEY(MEMORY_STATISTICS, std::map<std::string, uint64_t>);
/** /**
* @brief Possible return value for OPTIMIZATION_CAPABILITIES metric * @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
* - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication * - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication

View File

@ -166,6 +166,28 @@ INSTANTIATE_TEST_SUITE_P(
// Executable Network GetMetric // Executable Network GetMetric
// //
using IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS = IEClassBaseTestP;
TEST_P(IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS, GetMetricNoThrow) {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
Core ie;
Parameter p;
ExecutableNetwork exeNetwork = ie.LoadNetwork(simpleNetwork, deviceName);
ASSERT_NO_THROW(p = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS)));
std::map<std::string, uint64_t> t = p;
std::cout << "Memory Statistics: " << std::endl;
for (auto &&kv : t) {
std::cout << kv.first << ": " << kv.second << " bytes" << std::endl;
}
}
INSTANTIATE_TEST_SUITE_P(
nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS,
::testing::Values("GPU")
);
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS, nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
::testing::Values("GPU", "MULTI:GPU", "HETERO:GPU", "AUTO:GPU,CPU") ::testing::Values("GPU", "MULTI:GPU", "HETERO:GPU", "AUTO:GPU,CPU")

View File

@ -109,6 +109,10 @@ public:
/// Returns the amount of GPU memory specified allocation @p type that currently used by the engine /// Returns the amount of GPU memory specified allocation @p type that currently used by the engine
uint64_t get_used_device_memory(allocation_type type) const; uint64_t get_used_device_memory(allocation_type type) const;
/// Returns statistics of GPU memory allocated by engine in current process for all allocation types.
/// @note It contains information about both current and peak memory usage
void get_memory_statistics(std::map<std::string, uint64_t>* statistics) const;
/// Adds @p bytes count to currently used memory size of the specified allocation @p type /// Adds @p bytes count to currently used memory size of the specified allocation @p type
void add_memory_used(uint64_t bytes, allocation_type type); void add_memory_used(uint64_t bytes, allocation_type type);
@ -153,9 +157,10 @@ protected:
engine(const device::ptr device, const engine_configuration& configuration); engine(const device::ptr device, const engine_configuration& configuration);
const device::ptr _device; const device::ptr _device;
engine_configuration _configuration; engine_configuration _configuration;
mutable std::mutex _mutex;
std::map<allocation_type, std::atomic<uint64_t>> memory_usage_map; std::map<allocation_type, std::atomic<uint64_t>> _memory_usage_map;
std::map<allocation_type, std::atomic<uint64_t>> peak_memory_usage_map; std::map<allocation_type, std::atomic<uint64_t>> _peak_memory_usage_map;
}; };
} // namespace cldnn } // namespace cldnn

View File

@ -120,48 +120,64 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint
#endif // _WIN32 #endif // _WIN32
uint64_t engine::get_max_used_device_memory() const { uint64_t engine::get_max_used_device_memory() const {
std::lock_guard<std::mutex> guard(_mutex);
uint64_t total_peak_memory_usage {0}; uint64_t total_peak_memory_usage {0};
for (auto const& m : peak_memory_usage_map) { for (auto const& m : _peak_memory_usage_map) {
total_peak_memory_usage += m.second.load(); total_peak_memory_usage += m.second.load();
} }
return total_peak_memory_usage; return total_peak_memory_usage;
} }
uint64_t engine::get_max_used_device_memory(allocation_type type) const { uint64_t engine::get_max_used_device_memory(allocation_type type) const {
std::lock_guard<std::mutex> guard(_mutex);
uint64_t peak_memory_usage {0}; uint64_t peak_memory_usage {0};
auto iter = peak_memory_usage_map.find(type); auto iter = _peak_memory_usage_map.find(type);
if (iter != peak_memory_usage_map.end()) { if (iter != _peak_memory_usage_map.end()) {
peak_memory_usage = iter->second.load(); peak_memory_usage = iter->second.load();
} }
return peak_memory_usage; return peak_memory_usage;
} }
uint64_t engine::get_used_device_memory(allocation_type type) const { uint64_t engine::get_used_device_memory(allocation_type type) const {
std::lock_guard<std::mutex> guard(_mutex);
uint64_t memory_usage {0}; uint64_t memory_usage {0};
auto iter = memory_usage_map.find(type); auto iter = _memory_usage_map.find(type);
if (iter != memory_usage_map.end()) { if (iter != _memory_usage_map.end()) {
memory_usage = iter->second.load(); memory_usage = iter->second.load();
} }
return memory_usage; return memory_usage;
} }
void engine::add_memory_used(size_t bytes, allocation_type type) { void engine::get_memory_statistics(std::map<std::string, uint64_t>* statistics) const {
if (!memory_usage_map.count(type) && !peak_memory_usage_map.count(type)) { for (auto const& m : _memory_usage_map) {
static std::mutex m; std::ostringstream oss;
std::lock_guard<std::mutex> guard(m); oss << m.first << "_current";
memory_usage_map[type] = 0; (*statistics)[oss.str()] = m.second.load();
peak_memory_usage_map[type] = 0;
} }
memory_usage_map[type] += bytes; for (auto const& m : _peak_memory_usage_map) {
if (memory_usage_map[type] > peak_memory_usage_map[type]) { std::ostringstream oss;
peak_memory_usage_map[type] = memory_usage_map[type].load(); oss << m.first << "_peak";
(*statistics)[oss.str()] = m.second.load();
}
}
void engine::add_memory_used(size_t bytes, allocation_type type) {
std::lock_guard<std::mutex> guard(_mutex);
if (!_memory_usage_map.count(type) && !_peak_memory_usage_map.count(type)) {
_memory_usage_map[type] = 0;
_peak_memory_usage_map[type] = 0;
}
_memory_usage_map[type] += bytes;
if (_memory_usage_map[type] > _peak_memory_usage_map[type]) {
_peak_memory_usage_map[type] = _memory_usage_map[type].load();
} }
} }
void engine::subtract_memory_used(size_t bytes, allocation_type type) { void engine::subtract_memory_used(size_t bytes, allocation_type type) {
auto iter = memory_usage_map.find(type); std::lock_guard<std::mutex> guard(_mutex);
if (iter != memory_usage_map.end()) { auto iter = _memory_usage_map.find(type);
memory_usage_map[type] -= bytes; if (iter != _memory_usage_map.end()) {
_memory_usage_map[type] -= bytes;
} else { } else {
throw std::runtime_error("Attempt to free unallocated memory"); throw std::runtime_error("Attempt to free unallocated memory");
} }