[GPU] Add GPU plugin metric to get statistics of GPU memory allocated by engine (#7758)
This commit is contained in:
parent
9e1231ac8d
commit
4e2cc3e370
@ -71,6 +71,10 @@ Or the current temperature of `MYRIAD` device:
|
|||||||
|
|
||||||
@snippet snippets/InferenceEngine_QueryAPI4.cpp part4
|
@snippet snippets/InferenceEngine_QueryAPI4.cpp part4
|
||||||
|
|
||||||
|
The code below demonstrates how to get memory statistics of `GPU` device:
|
||||||
|
|
||||||
|
@snippet snippets/InferenceEngine_QueryAPI6.cpp part6
|
||||||
|
|
||||||
### GetConfig()
|
### GetConfig()
|
||||||
|
|
||||||
The method is used to get information about configuration values the executable network has been created with:
|
The method is used to get information about configuration values the executable network has been created with:
|
||||||
|
12
docs/snippets/InferenceEngine_QueryAPI6.cpp
Normal file
12
docs/snippets/InferenceEngine_QueryAPI6.cpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
//! [part6]
|
||||||
|
InferenceEngine::Core core;
|
||||||
|
auto network = core.ReadNetwork("sample.xml");
|
||||||
|
auto exeNetwork = core.LoadNetwork(network, "GPU");
|
||||||
|
std::map<std::string, uint64_t> statistics_map = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS));
|
||||||
|
//! [part6]
|
||||||
|
return 0;
|
||||||
|
}
|
@ -128,6 +128,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
|
|||||||
metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
|
metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
|
||||||
metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||||
metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
|
metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
|
||||||
|
metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS));
|
||||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
|
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
|
||||||
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
||||||
std::vector<std::string> configKeys;
|
std::vector<std::string> configKeys;
|
||||||
@ -139,6 +140,16 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
|
|||||||
if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
|
if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
|
||||||
nr *= 2;
|
nr *= 2;
|
||||||
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
|
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
|
||||||
|
} else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
|
||||||
|
std::map<std::string, uint64_t> statistics;
|
||||||
|
if (m_context != nullptr) {
|
||||||
|
auto impl = getContextImpl(m_context);
|
||||||
|
impl->acquire_lock();
|
||||||
|
std::shared_ptr<cldnn::engine> eng = impl->GetEngine();
|
||||||
|
eng->get_memory_statistics(&statistics);
|
||||||
|
impl->release_lock();
|
||||||
|
}
|
||||||
|
IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics);
|
||||||
} else {
|
} else {
|
||||||
IE_THROW() << "Unsupported ExecutableNetwork metric: " << name;
|
IE_THROW() << "Unsupported ExecutableNetwork metric: " << name;
|
||||||
}
|
}
|
||||||
|
@ -45,6 +45,12 @@ DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
|
|||||||
*/
|
*/
|
||||||
DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
|
DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Metric to get statistics of GPU memory allocated by engine for each allocation type
|
||||||
|
* It contains information about both current and peak memory usage
|
||||||
|
*/
|
||||||
|
DECLARE_GPU_METRIC_KEY(MEMORY_STATISTICS, std::map<std::string, uint64_t>);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
|
* @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
|
||||||
* - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
|
* - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
|
||||||
|
@ -166,6 +166,28 @@ INSTANTIATE_TEST_SUITE_P(
|
|||||||
// Executable Network GetMetric
|
// Executable Network GetMetric
|
||||||
//
|
//
|
||||||
|
|
||||||
|
using IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS = IEClassBaseTestP;
|
||||||
|
TEST_P(IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS, GetMetricNoThrow) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED();
|
||||||
|
Core ie;
|
||||||
|
Parameter p;
|
||||||
|
|
||||||
|
ExecutableNetwork exeNetwork = ie.LoadNetwork(simpleNetwork, deviceName);
|
||||||
|
|
||||||
|
ASSERT_NO_THROW(p = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS)));
|
||||||
|
std::map<std::string, uint64_t> t = p;
|
||||||
|
|
||||||
|
std::cout << "Memory Statistics: " << std::endl;
|
||||||
|
for (auto &&kv : t) {
|
||||||
|
std::cout << kv.first << ": " << kv.second << " bytes" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS,
|
||||||
|
::testing::Values("GPU")
|
||||||
|
);
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
|
nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
|
||||||
::testing::Values("GPU", "MULTI:GPU", "HETERO:GPU", "AUTO:GPU,CPU")
|
::testing::Values("GPU", "MULTI:GPU", "HETERO:GPU", "AUTO:GPU,CPU")
|
||||||
|
@ -109,6 +109,10 @@ public:
|
|||||||
/// Returns the amount of GPU memory specified allocation @p type that currently used by the engine
|
/// Returns the amount of GPU memory specified allocation @p type that currently used by the engine
|
||||||
uint64_t get_used_device_memory(allocation_type type) const;
|
uint64_t get_used_device_memory(allocation_type type) const;
|
||||||
|
|
||||||
|
/// Returns statistics of GPU memory allocated by engine in current process for all allocation types.
|
||||||
|
/// @note It contains information about both current and peak memory usage
|
||||||
|
void get_memory_statistics(std::map<std::string, uint64_t>* statistics) const;
|
||||||
|
|
||||||
/// Adds @p bytes count to currently used memory size of the specified allocation @p type
|
/// Adds @p bytes count to currently used memory size of the specified allocation @p type
|
||||||
void add_memory_used(uint64_t bytes, allocation_type type);
|
void add_memory_used(uint64_t bytes, allocation_type type);
|
||||||
|
|
||||||
@ -153,9 +157,10 @@ protected:
|
|||||||
engine(const device::ptr device, const engine_configuration& configuration);
|
engine(const device::ptr device, const engine_configuration& configuration);
|
||||||
const device::ptr _device;
|
const device::ptr _device;
|
||||||
engine_configuration _configuration;
|
engine_configuration _configuration;
|
||||||
|
mutable std::mutex _mutex;
|
||||||
|
|
||||||
std::map<allocation_type, std::atomic<uint64_t>> memory_usage_map;
|
std::map<allocation_type, std::atomic<uint64_t>> _memory_usage_map;
|
||||||
std::map<allocation_type, std::atomic<uint64_t>> peak_memory_usage_map;
|
std::map<allocation_type, std::atomic<uint64_t>> _peak_memory_usage_map;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace cldnn
|
} // namespace cldnn
|
||||||
|
@ -120,48 +120,64 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint
|
|||||||
#endif // _WIN32
|
#endif // _WIN32
|
||||||
|
|
||||||
uint64_t engine::get_max_used_device_memory() const {
|
uint64_t engine::get_max_used_device_memory() const {
|
||||||
|
std::lock_guard<std::mutex> guard(_mutex);
|
||||||
uint64_t total_peak_memory_usage {0};
|
uint64_t total_peak_memory_usage {0};
|
||||||
for (auto const& m : peak_memory_usage_map) {
|
for (auto const& m : _peak_memory_usage_map) {
|
||||||
total_peak_memory_usage += m.second.load();
|
total_peak_memory_usage += m.second.load();
|
||||||
}
|
}
|
||||||
return total_peak_memory_usage;
|
return total_peak_memory_usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t engine::get_max_used_device_memory(allocation_type type) const {
|
uint64_t engine::get_max_used_device_memory(allocation_type type) const {
|
||||||
|
std::lock_guard<std::mutex> guard(_mutex);
|
||||||
uint64_t peak_memory_usage {0};
|
uint64_t peak_memory_usage {0};
|
||||||
auto iter = peak_memory_usage_map.find(type);
|
auto iter = _peak_memory_usage_map.find(type);
|
||||||
if (iter != peak_memory_usage_map.end()) {
|
if (iter != _peak_memory_usage_map.end()) {
|
||||||
peak_memory_usage = iter->second.load();
|
peak_memory_usage = iter->second.load();
|
||||||
}
|
}
|
||||||
return peak_memory_usage;
|
return peak_memory_usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t engine::get_used_device_memory(allocation_type type) const {
|
uint64_t engine::get_used_device_memory(allocation_type type) const {
|
||||||
|
std::lock_guard<std::mutex> guard(_mutex);
|
||||||
uint64_t memory_usage {0};
|
uint64_t memory_usage {0};
|
||||||
auto iter = memory_usage_map.find(type);
|
auto iter = _memory_usage_map.find(type);
|
||||||
if (iter != memory_usage_map.end()) {
|
if (iter != _memory_usage_map.end()) {
|
||||||
memory_usage = iter->second.load();
|
memory_usage = iter->second.load();
|
||||||
}
|
}
|
||||||
return memory_usage;
|
return memory_usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
void engine::add_memory_used(size_t bytes, allocation_type type) {
|
void engine::get_memory_statistics(std::map<std::string, uint64_t>* statistics) const {
|
||||||
if (!memory_usage_map.count(type) && !peak_memory_usage_map.count(type)) {
|
for (auto const& m : _memory_usage_map) {
|
||||||
static std::mutex m;
|
std::ostringstream oss;
|
||||||
std::lock_guard<std::mutex> guard(m);
|
oss << m.first << "_current";
|
||||||
memory_usage_map[type] = 0;
|
(*statistics)[oss.str()] = m.second.load();
|
||||||
peak_memory_usage_map[type] = 0;
|
|
||||||
}
|
}
|
||||||
memory_usage_map[type] += bytes;
|
for (auto const& m : _peak_memory_usage_map) {
|
||||||
if (memory_usage_map[type] > peak_memory_usage_map[type]) {
|
std::ostringstream oss;
|
||||||
peak_memory_usage_map[type] = memory_usage_map[type].load();
|
oss << m.first << "_peak";
|
||||||
|
(*statistics)[oss.str()] = m.second.load();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void engine::add_memory_used(size_t bytes, allocation_type type) {
|
||||||
|
std::lock_guard<std::mutex> guard(_mutex);
|
||||||
|
if (!_memory_usage_map.count(type) && !_peak_memory_usage_map.count(type)) {
|
||||||
|
_memory_usage_map[type] = 0;
|
||||||
|
_peak_memory_usage_map[type] = 0;
|
||||||
|
}
|
||||||
|
_memory_usage_map[type] += bytes;
|
||||||
|
if (_memory_usage_map[type] > _peak_memory_usage_map[type]) {
|
||||||
|
_peak_memory_usage_map[type] = _memory_usage_map[type].load();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void engine::subtract_memory_used(size_t bytes, allocation_type type) {
|
void engine::subtract_memory_used(size_t bytes, allocation_type type) {
|
||||||
auto iter = memory_usage_map.find(type);
|
std::lock_guard<std::mutex> guard(_mutex);
|
||||||
if (iter != memory_usage_map.end()) {
|
auto iter = _memory_usage_map.find(type);
|
||||||
memory_usage_map[type] -= bytes;
|
if (iter != _memory_usage_map.end()) {
|
||||||
|
_memory_usage_map[type] -= bytes;
|
||||||
} else {
|
} else {
|
||||||
throw std::runtime_error("Attempt to free unallocated memory");
|
throw std::runtime_error("Attempt to free unallocated memory");
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user