eearly experiments with model mem footprint

This commit is contained in:
myshevts 2021-09-24 14:05:24 +03:00
parent f1b8e586ac
commit f27f922e19
6 changed files with 43 additions and 0 deletions

View File

@ -420,6 +420,9 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con
auto & deviceName = metaDevice.deviceName;
auto & deviceConfig = metaDevice.config;
// network.serialize("out_orig.xml", "out_orig.bin");
auto executableNetworkForDeviceBatch1 = GetCore()->LoadNetwork(CNNNetwork{network}, deviceName, deviceConfig);
uint64_t footprint = executableNetworkForDeviceBatch1->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
std::cout << "!!!!!!!!!!!!!! Original (batch1):" << footprint << std::endl;
CNNNetwork clonedNetwork(InferenceEngine::cloneNetwork(network));
const InputsDataMap inputInfo = clonedNetwork.getInputsInfo();
@ -445,6 +448,8 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con
if (executableNetworkForDevice == nullptr)
IE_THROW(NotFound) << "Failed to load Executable network the device "
<< "that the BATCH device is initialized to work with";
footprint = executableNetworkForDevice->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
std::cout << "!!!!!!!!!!!!!! (BATCHED):" << footprint << std::endl;
auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
bool enablePerfCounters = (fullConfig.end() != perfConfig) && (perfConfig->second == PluginConfigParams::YES);

View File

@ -160,6 +160,11 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
nr *= 2;
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
} else if (name == GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT)) {
uint64_t f = 0;
for (const auto& g : m_graphs)
f+= g->get_mem_footprint();
IE_SET_METRIC_RETURN(GPU_NETWORK_MEM_FOOTPRINT, f);
} else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
std::map<std::string, uint64_t> statistics;
if (m_context != nullptr) {

View File

@ -46,8 +46,15 @@ CLDNNGraph::CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr
, m_config(config)
, m_stream_id(stream_id)
, m_state(0) {
auto memory_usage_before = get_current_engine_mem_footprint();
std::cout << "mem_used BEFORE: " << memory_usage_before << std::endl;
m_program = std::make_shared<Program>(network, GetEngine(), m_config);
Build();
auto memory_usage_after = get_current_engine_mem_footprint();
std::cout << "mem_used After: " << memory_usage_after << " total: " << GetEngine()->get_device_info().max_alloc_mem_size << std::endl;
m_footprint = memory_usage_after - memory_usage_before;
}
CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
@ -57,7 +64,23 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
, m_config(graph->m_config)
, m_stream_id(stream_id)
, m_state(0) {
auto memory_usage_before = get_current_engine_mem_footprint();
std::cout << "mem_used BEFORE: " << memory_usage_before << std::endl;
Build();
auto memory_usage_after = get_current_engine_mem_footprint();
std::cout << "mem_used After: " << memory_usage_after << std::endl;
m_footprint = memory_usage_after - memory_usage_before;
}
uint64_t CLDNNGraph::get_current_engine_mem_footprint() const {
uint64_t memory_usage {0};
auto engine = GetEngine();
for (int t = static_cast<int>(cldnn::allocation_type::unknown);
static_cast<cldnn::allocation_type>(t) <= cldnn::allocation_type::usm_device; t++)
memory_usage += engine->get_used_device_memory(static_cast<cldnn::allocation_type>(t));
return memory_usage;
}
void CLDNNGraph::UpdateLayersMaps() {

View File

@ -56,6 +56,7 @@ public:
InferenceEngine::SizeVector GetOutputSize(std::string outName) const;
std::string MapOutputName(std::string outName) const;
std::string getName() const { return m_networkName; }
uint64_t get_mem_footprint() const { return m_footprint; }
void wait(Stage stage_mask) {
std::unique_lock<std::mutex> lock(m_infer_mutex);
m_cv.wait(lock, [&] {
@ -78,6 +79,7 @@ protected:
std::string m_networkName;
Config m_config;
uint64_t m_footprint = {0};
InferenceEngine::gpu::ClContext::Ptr m_context;
std::vector<std::shared_ptr<cldnn::network>> m_networks;
@ -99,6 +101,7 @@ protected:
void UpdateImplementationsMap();
std::shared_ptr<ngraph::Function> GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& pi,
bool filter_const_primitives = true);
uint64_t get_current_engine_mem_footprint() const;
};
} // namespace CLDNNPlugin

View File

@ -35,6 +35,12 @@ namespace Metrics {
*/
DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
/**
* @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size,
* for dGPU - dedicated gpu memory size
*/
DECLARE_GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT, uint64_t);
/**
* @brief Metric to get microarchitecture identifier in major.minor.revision format
*/

View File

@ -255,6 +255,7 @@ DECLARE_CONFIG_VALUE(NO);
* @brief Auto-batching to the `#batch`.
*/
DECLARE_CONFIG_KEY(AUTO_BATCH);
DECLARE_CONFIG_KEY(AUTO_BATCH_TIMEOUT);
/**
* @brief Limit `#threads` that are used by Inference Engine for inference on the CPU.