eearly experiments with model mem footprint

2021-09-24 14:05:24 +03:00 · 2021-09-24 14:05:24 +03:00 · f27f922e19
commit f27f922e19
parent f1b8e586ac
6 changed files with 43 additions and 0 deletions
--- a/inference-engine/src/auto_batch/auto_batch.cpp
+++ b/inference-engine/src/auto_batch/auto_batch.cpp
@ -420,6 +420,9 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con
    auto & deviceName = metaDevice.deviceName;
    auto & deviceConfig = metaDevice.config;
    // network.serialize("out_orig.xml", "out_orig.bin");
+    auto executableNetworkForDeviceBatch1 = GetCore()->LoadNetwork(CNNNetwork{network}, deviceName, deviceConfig);
+    uint64_t footprint = executableNetworkForDeviceBatch1->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
+    std::cout << "!!!!!!!!!!!!!! Original (batch1):" << footprint << std::endl;

    CNNNetwork clonedNetwork(InferenceEngine::cloneNetwork(network));
    const InputsDataMap inputInfo = clonedNetwork.getInputsInfo();
@ -445,6 +448,8 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con
    if (executableNetworkForDevice == nullptr)
        IE_THROW(NotFound) << "Failed to load Executable network the device "
                                            <<  "that the BATCH device is initialized to work with";
+    footprint = executableNetworkForDevice->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
+    std::cout << "!!!!!!!!!!!!!! (BATCHED):" << footprint << std::endl;

    auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
    bool enablePerfCounters = (fullConfig.end() != perfConfig) && (perfConfig->second == PluginConfigParams::YES);
--- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
@ -160,6 +160,11 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
        if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
            nr *= 2;
        IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
+    } else if (name == GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT)) {
+        uint64_t f = 0;
+        for (const auto& g : m_graphs)
+            f+= g->get_mem_footprint();
+        IE_SET_METRIC_RETURN(GPU_NETWORK_MEM_FOOTPRINT, f);
    } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
        std::map<std::string, uint64_t> statistics;
        if (m_context != nullptr) {
--- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
@ -46,8 +46,15 @@ CLDNNGraph::CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr
    , m_config(config)
    , m_stream_id(stream_id)
    , m_state(0) {
+    auto memory_usage_before  = get_current_engine_mem_footprint();
+    std::cout << "mem_used BEFORE: " << memory_usage_before  << std::endl;
+
    m_program = std::make_shared<Program>(network, GetEngine(), m_config);
    Build();
+
+    auto memory_usage_after = get_current_engine_mem_footprint();
+    std::cout << "mem_used After: " << memory_usage_after  << " total: " << GetEngine()->get_device_info().max_alloc_mem_size << std::endl;
+    m_footprint =  memory_usage_after - memory_usage_before;
 }

 CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
@ -57,7 +64,23 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
        , m_config(graph->m_config)
        , m_stream_id(stream_id)
        , m_state(0) {
+    auto memory_usage_before  = get_current_engine_mem_footprint();
+    std::cout << "mem_used BEFORE: " << memory_usage_before  << std::endl;
+
    Build();
+
+    auto memory_usage_after = get_current_engine_mem_footprint();
+    std::cout << "mem_used After: " << memory_usage_after  << std::endl;
+    m_footprint =  memory_usage_after - memory_usage_before;
+}
+
+uint64_t CLDNNGraph::get_current_engine_mem_footprint() const {
+    uint64_t memory_usage {0};
+    auto engine = GetEngine();
+    for (int t = static_cast<int>(cldnn::allocation_type::unknown);
+        static_cast<cldnn::allocation_type>(t) <= cldnn::allocation_type::usm_device; t++)
+        memory_usage += engine->get_used_device_memory(static_cast<cldnn::allocation_type>(t));
+    return memory_usage;
 }

 void CLDNNGraph::UpdateLayersMaps() {
--- a/inference-engine/src/cldnn_engine/cldnn_graph.h
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.h
@ -56,6 +56,7 @@ public:
    InferenceEngine::SizeVector GetOutputSize(std::string outName) const;
    std::string MapOutputName(std::string outName) const;
    std::string getName() const { return m_networkName; }
+    uint64_t get_mem_footprint() const { return m_footprint; }
    void wait(Stage stage_mask) {
        std::unique_lock<std::mutex> lock(m_infer_mutex);
        m_cv.wait(lock, [&] {
@ -78,6 +79,7 @@ protected:

    std::string m_networkName;
    Config m_config;
+    uint64_t m_footprint = {0};

    InferenceEngine::gpu::ClContext::Ptr m_context;
    std::vector<std::shared_ptr<cldnn::network>> m_networks;
@ -99,6 +101,7 @@ protected:
    void UpdateImplementationsMap();
    std::shared_ptr<ngraph::Function> GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& pi,
                                                                       bool filter_const_primitives = true);
+    uint64_t get_current_engine_mem_footprint() const;
 };

 }  // namespace CLDNNPlugin
--- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp
+++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp
@ -35,6 +35,12 @@ namespace Metrics {
 */
 DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);

+/**
+ * @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size,
+ * for dGPU - dedicated gpu memory size
+ */
+DECLARE_GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT, uint64_t);
+
 /**
 * @brief Metric to get microarchitecture identifier in major.minor.revision format
 */
--- a/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp
+++ b/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp
@ -255,6 +255,7 @@ DECLARE_CONFIG_VALUE(NO);
 * @brief Auto-batching to the `#batch`.
 */
 DECLARE_CONFIG_KEY(AUTO_BATCH);
+DECLARE_CONFIG_KEY(AUTO_BATCH_TIMEOUT);

 /**
 * @brief Limit `#threads` that are used by Inference Engine for inference on the CPU.