eearly experiments with model mem footprint
This commit is contained in:
parent
f1b8e586ac
commit
f27f922e19
@ -420,6 +420,9 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con
|
||||
auto & deviceName = metaDevice.deviceName;
|
||||
auto & deviceConfig = metaDevice.config;
|
||||
// network.serialize("out_orig.xml", "out_orig.bin");
|
||||
auto executableNetworkForDeviceBatch1 = GetCore()->LoadNetwork(CNNNetwork{network}, deviceName, deviceConfig);
|
||||
uint64_t footprint = executableNetworkForDeviceBatch1->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
|
||||
std::cout << "!!!!!!!!!!!!!! Original (batch1):" << footprint << std::endl;
|
||||
|
||||
CNNNetwork clonedNetwork(InferenceEngine::cloneNetwork(network));
|
||||
const InputsDataMap inputInfo = clonedNetwork.getInputsInfo();
|
||||
@ -445,6 +448,8 @@ IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(con
|
||||
if (executableNetworkForDevice == nullptr)
|
||||
IE_THROW(NotFound) << "Failed to load Executable network the device "
|
||||
<< "that the BATCH device is initialized to work with";
|
||||
footprint = executableNetworkForDevice->GetMetric(GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT));
|
||||
std::cout << "!!!!!!!!!!!!!! (BATCHED):" << footprint << std::endl;
|
||||
|
||||
auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
|
||||
bool enablePerfCounters = (fullConfig.end() != perfConfig) && (perfConfig->second == PluginConfigParams::YES);
|
||||
|
@ -160,6 +160,11 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
|
||||
if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
|
||||
nr *= 2;
|
||||
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
|
||||
} else if (name == GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT)) {
|
||||
uint64_t f = 0;
|
||||
for (const auto& g : m_graphs)
|
||||
f+= g->get_mem_footprint();
|
||||
IE_SET_METRIC_RETURN(GPU_NETWORK_MEM_FOOTPRINT, f);
|
||||
} else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
|
||||
std::map<std::string, uint64_t> statistics;
|
||||
if (m_context != nullptr) {
|
||||
|
@ -46,8 +46,15 @@ CLDNNGraph::CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr
|
||||
, m_config(config)
|
||||
, m_stream_id(stream_id)
|
||||
, m_state(0) {
|
||||
auto memory_usage_before = get_current_engine_mem_footprint();
|
||||
std::cout << "mem_used BEFORE: " << memory_usage_before << std::endl;
|
||||
|
||||
m_program = std::make_shared<Program>(network, GetEngine(), m_config);
|
||||
Build();
|
||||
|
||||
auto memory_usage_after = get_current_engine_mem_footprint();
|
||||
std::cout << "mem_used After: " << memory_usage_after << " total: " << GetEngine()->get_device_info().max_alloc_mem_size << std::endl;
|
||||
m_footprint = memory_usage_after - memory_usage_before;
|
||||
}
|
||||
|
||||
CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
|
||||
@ -57,7 +64,23 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
|
||||
, m_config(graph->m_config)
|
||||
, m_stream_id(stream_id)
|
||||
, m_state(0) {
|
||||
auto memory_usage_before = get_current_engine_mem_footprint();
|
||||
std::cout << "mem_used BEFORE: " << memory_usage_before << std::endl;
|
||||
|
||||
Build();
|
||||
|
||||
auto memory_usage_after = get_current_engine_mem_footprint();
|
||||
std::cout << "mem_used After: " << memory_usage_after << std::endl;
|
||||
m_footprint = memory_usage_after - memory_usage_before;
|
||||
}
|
||||
|
||||
uint64_t CLDNNGraph::get_current_engine_mem_footprint() const {
|
||||
uint64_t memory_usage {0};
|
||||
auto engine = GetEngine();
|
||||
for (int t = static_cast<int>(cldnn::allocation_type::unknown);
|
||||
static_cast<cldnn::allocation_type>(t) <= cldnn::allocation_type::usm_device; t++)
|
||||
memory_usage += engine->get_used_device_memory(static_cast<cldnn::allocation_type>(t));
|
||||
return memory_usage;
|
||||
}
|
||||
|
||||
void CLDNNGraph::UpdateLayersMaps() {
|
||||
|
@ -56,6 +56,7 @@ public:
|
||||
InferenceEngine::SizeVector GetOutputSize(std::string outName) const;
|
||||
std::string MapOutputName(std::string outName) const;
|
||||
std::string getName() const { return m_networkName; }
|
||||
uint64_t get_mem_footprint() const { return m_footprint; }
|
||||
void wait(Stage stage_mask) {
|
||||
std::unique_lock<std::mutex> lock(m_infer_mutex);
|
||||
m_cv.wait(lock, [&] {
|
||||
@ -78,6 +79,7 @@ protected:
|
||||
|
||||
std::string m_networkName;
|
||||
Config m_config;
|
||||
uint64_t m_footprint = {0};
|
||||
|
||||
InferenceEngine::gpu::ClContext::Ptr m_context;
|
||||
std::vector<std::shared_ptr<cldnn::network>> m_networks;
|
||||
@ -99,6 +101,7 @@ protected:
|
||||
void UpdateImplementationsMap();
|
||||
std::shared_ptr<ngraph::Function> GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& pi,
|
||||
bool filter_const_primitives = true);
|
||||
uint64_t get_current_engine_mem_footprint() const;
|
||||
};
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
|
@ -35,6 +35,12 @@ namespace Metrics {
|
||||
*/
|
||||
DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
|
||||
|
||||
/**
|
||||
* @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size,
|
||||
* for dGPU - dedicated gpu memory size
|
||||
*/
|
||||
DECLARE_GPU_METRIC_KEY(NETWORK_MEM_FOOTPRINT, uint64_t);
|
||||
|
||||
/**
|
||||
* @brief Metric to get microarchitecture identifier in major.minor.revision format
|
||||
*/
|
||||
|
@ -255,6 +255,7 @@ DECLARE_CONFIG_VALUE(NO);
|
||||
* @brief Auto-batching to the `#batch`.
|
||||
*/
|
||||
DECLARE_CONFIG_KEY(AUTO_BATCH);
|
||||
DECLARE_CONFIG_KEY(AUTO_BATCH_TIMEOUT);
|
||||
|
||||
/**
|
||||
* @brief Limit `#threads` that are used by Inference Engine for inference on the CPU.
|
||||
|
Loading…
Reference in New Issue
Block a user