quick-n-dirty batch footpint vs device total mem

2021-10-11 15:50:44 +03:00 · 2021-10-11 15:50:44 +03:00 · 1c98dbe244
commit 1c98dbe244
parent 776624ff2a
2 changed files with 15 additions and 5 deletions
--- a/inference-engine/src/auto_batch/auto_batch.cpp
+++ b/inference-engine/src/auto_batch/auto_batch.cpp
@ -419,10 +419,6 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con

    auto & deviceName = metaDevice.deviceName;
    auto & deviceConfig = metaDevice.config;
-    // network.serialize("out_orig.xml", "out_orig.bin");
-    auto executableNetworkForDeviceBatch1 = GetCore()->LoadNetwork(CNNNetwork{network}, deviceName, deviceConfig);
-    uint64_t footprint = executableNetworkForDeviceBatch1->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
-    std::cout << "!!!!!!!!!!!!!! Original (batch1):" << footprint << std::endl;

    CNNNetwork clonedNetwork(InferenceEngine::cloneNetwork(network));
    const InputsDataMap inputInfo = clonedNetwork.getInputsInfo();
@ -448,7 +444,7 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con
    if (executableNetworkForDevice == nullptr)
        IE_THROW(NotFound) << "Failed to load Executable network the device "
                                            <<  "that the BATCH device is initialized to work with";
-    footprint = executableNetworkForDevice->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
+    uint64_t footprint = executableNetworkForDevice->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
    std::cout << "!!!!!!!!!!!!!! (BATCHED):" << footprint << std::endl;

    auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@ -695,8 +695,22 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
        } else if (memPressure.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
            batch = 4;
        }
+        // workaround to emulate the MAX_BATCH
+        auto executableNetworkForDeviceBatch1 = const_cast<clDNNEngine*>(this)->LoadExeNetworkImpl(*network,
+                                                {{PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, "1"}});
+        uint64_t footprint = executableNetworkForDeviceBatch1->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
        std::cout << "memPressure.max_mem_tolerance: " << memPressure.max_mem_tolerance << std::endl;
        std::cout << "SELECTED BATCH: " << batch << std::endl;
+        std::cout << "!!!!!!!!!!!!!! Original (batch1):" << footprint << std::endl;
+
+        uint64_t total_mem = GetMetric(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE), {{}});
+        total_mem /=2; // WA to accomodate #streams
+        while (total_mem < (footprint * batch)) {
+            batch /= 2;
+        }
+        // TODO: remove this workaround and avoid batching altogether if the network is too big (should happen in IE core)
+        batch = std::max(1u, batch);
+        std::cout << "ACTUAL SELECTED BATCH: " << batch << std::endl;
        IE_SET_METRIC_RETURN(OPTIMAL_BATCH, batch);
    } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
        auto deviceName = StringRightTrim(device_info.dev_name, "NEO", false);