From 9972410aa39977c0f7ab6edfc7da1863532a9e73 Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Thu, 18 Nov 2021 20:26:53 +0900 Subject: [PATCH] [GPU] Add IE Core GPU plugin metric to query overall memory statistics for GPU device (#8421) --- docs/IE_DG/InferenceEngine_QueryAPI.md | 4 - docs/IE_DG/supported_plugins/GPU.md | 5 + ...ceEngine_QueryAPI6.cpp => GPU_Metric0.cpp} | 6 +- .../src/cldnn_engine/cldnn_engine.cpp | 38 +++- .../src/cldnn_engine/cldnn_engine.h | 5 +- .../cldnn_engine/cldnn_executable_network.cpp | 11 - .../behavior/plugin/core_integration.cpp | 204 ++++++++++++++++-- .../behavior/plugin/core_integration.hpp | 1 + .../clDNN/api/cldnn/runtime/engine.hpp | 2 +- .../thirdparty/clDNN/runtime/engine.cpp | 8 +- 10 files changed, 239 insertions(+), 45 deletions(-) rename docs/snippets/{InferenceEngine_QueryAPI6.cpp => GPU_Metric0.cpp} (61%) diff --git a/docs/IE_DG/InferenceEngine_QueryAPI.md b/docs/IE_DG/InferenceEngine_QueryAPI.md index 23360cc62ab..34579924bad 100644 --- a/docs/IE_DG/InferenceEngine_QueryAPI.md +++ b/docs/IE_DG/InferenceEngine_QueryAPI.md @@ -71,10 +71,6 @@ Or the current temperature of `MYRIAD` device: @snippet snippets/InferenceEngine_QueryAPI4.cpp part4 -The code below demonstrates how to get memory statistics of `GPU` device: - -@snippet snippets/InferenceEngine_QueryAPI6.cpp part6 - ### GetConfig() The method is used to get information about configuration values the executable network has been created with: diff --git a/docs/IE_DG/supported_plugins/GPU.md b/docs/IE_DG/supported_plugins/GPU.md index 1c4c17430bf..f96e3fcba99 100644 --- a/docs/IE_DG/supported_plugins/GPU.md +++ b/docs/IE_DG/supported_plugins/GPU.md @@ -122,6 +122,11 @@ When specifying key values as raw strings (that is, when using Python API), omit | `KEY_TUNING_MODE` | `TUNING_DISABLED`
`TUNING_CREATE`
`TUNING_USE_EXISTING` | `TUNING_DISABLED` | Disable inference kernel tuning
Create tuning file (expect much longer runtime)
Use an existing tuning file. **Deprecated**. Will be removed in the next release | | `KEY_TUNING_FILE` | `""` | `""` | Tuning file to create / use. **Deprecated**. Will be removed in the next release | +## Quering GPU specific metric keys +* MEMORY_STATISTICS : Returns overall memory statistics of `GPU` device allocated by engine with allocation types. If the network has `TensorIterator` or `Loop` operation which is not unrolled, there will be additional allocation at the first inference phase. In such a case, querying for `MEMORY_STATISTICS` should be done after first inference for more accurate result. The code below demonstrates how to query overall memory statistics of `GPU` device: + +@snippet snippets/GPU_Metric0.cpp part0 + ## GPU Context and Video Memory Sharing RemoteBlob API See [RemoteBlob API of GPU Plugin](GPU_RemoteBlob_API.md) diff --git a/docs/snippets/InferenceEngine_QueryAPI6.cpp b/docs/snippets/GPU_Metric0.cpp similarity index 61% rename from docs/snippets/InferenceEngine_QueryAPI6.cpp rename to docs/snippets/GPU_Metric0.cpp index b13812107bf..77de1bbcdbf 100644 --- a/docs/snippets/InferenceEngine_QueryAPI6.cpp +++ b/docs/snippets/GPU_Metric0.cpp @@ -2,11 +2,11 @@ int main() { using namespace InferenceEngine; -//! [part6] +//! [part0] InferenceEngine::Core core; auto network = core.ReadNetwork("sample.xml"); auto exeNetwork = core.LoadNetwork(network, "GPU"); -std::map statistics_map = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS)); -//! [part6] +std::map statistics_map = core.GetMetric("GPU", GPU_METRIC_KEY(MEMORY_STATISTICS)); +//! [part0] return 0; } diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index df5a68b6518..fe1d2724f1d 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -176,6 +176,25 @@ void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine: } } +void clDNNEngine::UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateStatistics"); + { + std::lock_guard lock(engine_mutex); + + std::map statistics; + auto impl = getContextImpl(context); + impl->acquire_lock(); + std::shared_ptr eng = impl->GetEngine(); + statistics = eng->get_memory_statistics(); + impl->release_lock(); + + // if the same context exists, the statistics is replaced with the latest one + // (currently, memory usage is accumulated for several networks in the same context) + // if it does not exist, a new statistics is added + statistics_map[context] = statistics; + } +} + std::map clDNNEngine::ConvertPerfHintsToConfig( const std::map& network_config, const CLDNNPlugin::Config& plugin_config) const { @@ -258,7 +277,9 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE auto transformedNetwork = CloneAndTransformNetwork(network, conf); { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateExeNetwork"); - return std::make_shared(transformedNetwork, context, conf); + CLDNNExecNetwork::Ptr exeNetwork = std::make_shared(transformedNetwork, context, conf); + UpdateStatistics(context); + return exeNetwork; } } @@ -643,6 +664,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map range = std::make_tuple(1, 2); IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range); + } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) { + std::map statistics; + for (auto const &item : statistics_map) { + // Before collecting memory statistics of each context, it's updated with the latest memory statistics from engine. + UpdateStatistics(item.first); + for (auto const &kv : item.second) { + if (!statistics.count(kv.first)) { + statistics[kv.first] = kv.second; + } else { + statistics[kv.first] += kv.second; + } + } + } + IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics); } else { IE_THROW() << "Unsupported metric key " << name; } diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.h b/inference-engine/src/cldnn_engine/cldnn_engine.h index 0818e136967..1e2cb5cf815 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.h +++ b/inference-engine/src/cldnn_engine/cldnn_engine.h @@ -25,7 +25,9 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin, // key: device_id, value: cldnn device std::map device_map; - std::mutex engine_mutex; + // key: cldnn context, value: memory statistics + mutable std::map> statistics_map; + mutable std::mutex engine_mutex; mutable CLDNNRemoteCLContext::Ptr m_defaultContext; @@ -38,6 +40,7 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin, void RegisterPrimitives(); void UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map ¶ms) const; + void UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const; public: clDNNEngine(); diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp index 7e465f0f257..8c1eeef3e71 100644 --- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp @@ -155,7 +155,6 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) metrics.push_back(METRIC_KEY(SUPPORTED_METRICS)); metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)); - metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS)); IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { std::vector configKeys; @@ -167,16 +166,6 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY)) nr *= 2; IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr); - } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) { - std::map statistics; - if (m_context != nullptr) { - auto impl = getContextImpl(m_context); - impl->acquire_lock(); - std::shared_ptr eng = impl->GetEngine(); - eng->get_memory_statistics(&statistics); - impl->release_lock(); - } - IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics); } else { IE_THROW() << "Unsupported ExecutableNetwork metric: " << name; } diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/core_integration.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/core_integration.cpp index 38c4e5a808e..9452c2679c9 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/core_integration.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/core_integration.cpp @@ -153,6 +153,189 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values("GPU") ); +using IEClassGetMetricTest_GPU_MEMORY_STATISTICS_DEFAULT = BehaviorTestsUtils::IEClassBaseTestP; +TEST_P(IEClassGetMetricTest_GPU_MEMORY_STATISTICS_DEFAULT, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + InferenceEngine::Core ie; + InferenceEngine::Parameter p; + + InferenceEngine::ExecutableNetwork exec_net = ie.LoadNetwork(simpleCnnNetwork, deviceName); + + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t = p; + + ASSERT_FALSE(t.empty()); + std::cout << "Memory Statistics: " << std::endl; + for (auto &&kv : t) { + ASSERT_NE(kv.second, 0); + std::cout << kv.first << ": " << kv.second << " bytes" << std::endl; + } + + ASSERT_METRIC_SUPPORTED_IE(GPU_METRIC_KEY(MEMORY_STATISTICS)); +} + +INSTANTIATE_TEST_SUITE_P( + nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_MEMORY_STATISTICS_DEFAULT, + ::testing::Values("GPU") +); + +using IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTIPLE_NETWORKS = BehaviorTestsUtils::IEClassBaseTestP; +TEST_P(IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTIPLE_NETWORKS, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + InferenceEngine::Core ie; + InferenceEngine::Parameter p; + + InferenceEngine::ExecutableNetwork exec_net1 = ie.LoadNetwork(simpleCnnNetwork, deviceName); + + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t1 = p; + + ASSERT_FALSE(t1.empty()); + for (auto &&kv : t1) { + ASSERT_NE(kv.second, 0); + } + + InferenceEngine::ExecutableNetwork exec_net2 = ie.LoadNetwork(simpleCnnNetwork, deviceName); + + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t2 = p; + + ASSERT_FALSE(t2.empty()); + for (auto &&kv : t2) { + ASSERT_NE(kv.second, 0); + auto iter = t1.find(kv.first); + if (iter != t1.end()) { + ASSERT_EQ(kv.second, t1[kv.first] * 2); + } + } + + ASSERT_METRIC_SUPPORTED_IE(GPU_METRIC_KEY(MEMORY_STATISTICS)); +} + +INSTANTIATE_TEST_SUITE_P( + nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTIPLE_NETWORKS, + ::testing::Values("GPU") +); + +using IEClassGetMetricTest_GPU_MEMORY_STATISTICS_CHECK_VALUES = BehaviorTestsUtils::IEClassBaseTestP; +TEST_P(IEClassGetMetricTest_GPU_MEMORY_STATISTICS_CHECK_VALUES, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + InferenceEngine::Core ie; + InferenceEngine::Parameter p; + + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t1 = p; + ASSERT_TRUE(t1.empty()); + + { + InferenceEngine::ExecutableNetwork exec_net1 = ie.LoadNetwork(simpleCnnNetwork, deviceName); + + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t2 = p; + + ASSERT_FALSE(t2.empty()); + for (auto &&kv : t2) { + ASSERT_NE(kv.second, 0); + } + { + InferenceEngine::ExecutableNetwork exec_net2 = ie.LoadNetwork(actualCnnNetwork, deviceName); + + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t3 = p; + + ASSERT_FALSE(t3.empty()); + for (auto &&kv : t3) { + ASSERT_NE(kv.second, 0); + } + } + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t4 = p; + + ASSERT_FALSE(t4.empty()); + for (auto &&kv : t4) { + ASSERT_NE(kv.second, 0); + if (kv.first.find("_cur") != std::string::npos) { + auto iter = t2.find(kv.first); + if (iter != t2.end()) { + ASSERT_EQ(t2[kv.first], kv.second); + } + } + } + } + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t5 = p; + + ASSERT_FALSE(t5.empty()); + for (auto &&kv : t5) { + if (kv.first.find("_cur") != std::string::npos) { + ASSERT_EQ(kv.second, 0); + } + } + ASSERT_METRIC_SUPPORTED_IE(GPU_METRIC_KEY(MEMORY_STATISTICS)); +} + +INSTANTIATE_TEST_SUITE_P( + nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_MEMORY_STATISTICS_CHECK_VALUES, + ::testing::Values("GPU") +); + +using IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTI_THREADS = BehaviorTestsUtils::IEClassBaseTestP; +TEST_P(IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTI_THREADS, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + InferenceEngine::Core ie; + InferenceEngine::Parameter p; + + std::atomic counter{0u}; + std::vector threads(2); + // key: thread id, value: executable network + std::map exec_net_map; + std::vector networks; + networks.emplace_back(simpleCnnNetwork); + networks.emplace_back(simpleCnnNetwork); + + InferenceEngine::ExecutableNetwork exec_net1 = ie.LoadNetwork(simpleCnnNetwork, deviceName); + + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t1 = p; + + ASSERT_FALSE(t1.empty()); + for (auto &&kv : t1) { + ASSERT_NE(kv.second, 0); + } + + for (auto & thread : threads) { + thread = std::thread([&](){ + auto value = counter++; + exec_net_map[value] = ie.LoadNetwork(networks[value], deviceName); + }); + } + + for (auto & thread : threads) { + if (thread.joinable()) { + thread.join(); + } + } + + ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS))); + std::map t2 = p; + + ASSERT_FALSE(t2.empty()); + for (auto &&kv : t2) { + ASSERT_NE(kv.second, 0); + auto iter = t1.find(kv.first); + if (iter != t1.end()) { + ASSERT_EQ(kv.second, t1[kv.first] * 3); + } + } + + ASSERT_METRIC_SUPPORTED_IE(GPU_METRIC_KEY(MEMORY_STATISTICS)); +} + +INSTANTIATE_TEST_SUITE_P( + nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTI_THREADS, + ::testing::Values("GPU") +); + // // IE Class GetConfig // @@ -162,27 +345,6 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values("GPU") ); -using IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS = BehaviorTestsUtils::IEClassBaseTestP; -TEST_P(IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS, GetMetricNoThrow) { - InferenceEngine::Core ie = BehaviorTestsUtils::createIECoreWithTemplate(); - InferenceEngine::Parameter p; - - InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(simpleCnnNetwork, deviceName); - - ASSERT_NO_THROW(p = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS))); - std::map t = p; - - std::cout << "Memory Statistics: " << std::endl; - for (auto &&kv : t) { - std::cout << kv.first << ": " << kv.second << " bytes" << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P( - nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS, - ::testing::Values("GPU") -); - // IE Class Query network INSTANTIATE_TEST_SUITE_P( diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp index 7213021011c..f1731a857f2 100644 --- a/inference-engine/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include "base/behavior_test_utils.hpp" #include "common_test_utils/common_utils.hpp" diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp index a40d6b7ed91..8114009dd9a 100644 --- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp @@ -108,7 +108,7 @@ public: /// Returns statistics of GPU memory allocated by engine in current process for all allocation types. /// @note It contains information about both current and peak memory usage - void get_memory_statistics(std::map* statistics) const; + std::map get_memory_statistics() const; /// Adds @p bytes count to currently used memory size of the specified allocation @p type void add_memory_used(uint64_t bytes, allocation_type type); diff --git a/inference-engine/thirdparty/clDNN/runtime/engine.cpp b/inference-engine/thirdparty/clDNN/runtime/engine.cpp index d9c4bf32308..7e40a4ebf52 100644 --- a/inference-engine/thirdparty/clDNN/runtime/engine.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/engine.cpp @@ -149,17 +149,19 @@ uint64_t engine::get_used_device_memory(allocation_type type) const { return memory_usage; } -void engine::get_memory_statistics(std::map* statistics) const { +std::map engine::get_memory_statistics() const { + std::map statistics; for (auto const& m : _memory_usage_map) { std::ostringstream oss; oss << m.first << "_current"; - (*statistics)[oss.str()] = m.second.load(); + statistics[oss.str()] = m.second.load(); } for (auto const& m : _peak_memory_usage_map) { std::ostringstream oss; oss << m.first << "_peak"; - (*statistics)[oss.str()] = m.second.load(); + statistics[oss.str()] = m.second.load(); } + return statistics; } void engine::add_memory_used(size_t bytes, allocation_type type) {