quick-n-dirty batch footpint vs device total mem

This commit is contained in:
myshevts 2021-10-11 15:50:44 +03:00
parent 776624ff2a
commit 1c98dbe244
2 changed files with 15 additions and 5 deletions

View File

@ -419,10 +419,6 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con
auto & deviceName = metaDevice.deviceName;
auto & deviceConfig = metaDevice.config;
// network.serialize("out_orig.xml", "out_orig.bin");
auto executableNetworkForDeviceBatch1 = GetCore()->LoadNetwork(CNNNetwork{network}, deviceName, deviceConfig);
uint64_t footprint = executableNetworkForDeviceBatch1->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
std::cout << "!!!!!!!!!!!!!! Original (batch1):" << footprint << std::endl;
CNNNetwork clonedNetwork(InferenceEngine::cloneNetwork(network));
const InputsDataMap inputInfo = clonedNetwork.getInputsInfo();
@ -448,7 +444,7 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con
if (executableNetworkForDevice == nullptr)
IE_THROW(NotFound) << "Failed to load Executable network the device "
<< "that the BATCH device is initialized to work with";
footprint = executableNetworkForDevice->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
uint64_t footprint = executableNetworkForDevice->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
std::cout << "!!!!!!!!!!!!!! (BATCHED):" << footprint << std::endl;
auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);

View File

@ -695,8 +695,22 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
} else if (memPressure.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
batch = 4;
}
// workaround to emulate the MAX_BATCH
auto executableNetworkForDeviceBatch1 = const_cast<clDNNEngine*>(this)->LoadExeNetworkImpl(*network,
{{PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, "1"}});
uint64_t footprint = executableNetworkForDeviceBatch1->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
std::cout << "memPressure.max_mem_tolerance: " << memPressure.max_mem_tolerance << std::endl;
std::cout << "SELECTED BATCH: " << batch << std::endl;
std::cout << "!!!!!!!!!!!!!! Original (batch1):" << footprint << std::endl;
uint64_t total_mem = GetMetric(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE), {{}});
total_mem /=2; // WA to accomodate #streams
while (total_mem < (footprint * batch)) {
batch /= 2;
}
// TODO: remove this workaround and avoid batching altogether if the network is too big (should happen in IE core)
batch = std::max(1u, batch);
std::cout << "ACTUAL SELECTED BATCH: " << batch << std::endl;
IE_SET_METRIC_RETURN(OPTIMAL_BATCH, batch);
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
auto deviceName = StringRightTrim(device_info.dev_name, "NEO", false);