[GPU] Add real kernels' execution timings collection for DumpProfilingData debug option (#15797)

This commit is contained in:
Sergey Shlyapnikov
2023-04-25 14:33:08 +04:00
committed by GitHub
parent 1aec450fc6
commit 0a5975bdfa
2 changed files with 17 additions and 1 deletions

View File

@@ -161,17 +161,24 @@ public:
~profiled_stage() {
GPU_DEBUG_IF(profiling_enabled) {
using us = std::chrono::microseconds;
_finish = std::chrono::high_resolution_clock::now();
auto total_duration = std::chrono::duration_cast<std::chrono::microseconds>(_finish - _start).count();
auto stage_duration = std::chrono::duration_cast<us>(_finish - _start).count();
auto custom_stage_duration = std::chrono::duration_cast<us>(custom_duration).count();
auto total_duration = custom_stage_duration == 0 ? stage_duration
: custom_stage_duration;
_obj.add_profiling_data(_stage, cache_hit, total_duration);
}
}
void set_cache_hit(bool val = true) { cache_hit = val; }
void set_custom_stage_duration(std::chrono::nanoseconds duration) { custom_duration = duration; }
private:
bool profiling_enabled = false;
std::chrono::high_resolution_clock::time_point _start = {};
std::chrono::high_resolution_clock::time_point _finish = {};
std::chrono::nanoseconds custom_duration = {};
ProfiledObjectType& _obj;
instrumentation::pipeline_stage _stage;
bool cache_hit = false;

View File

@@ -523,6 +523,15 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
get_network().get_stream().wait_for_events({ev});
if (ev != nullptr) {
auto profiling_info = ev->get_profiling_info();
for (const auto &interval : profiling_info) {
if (interval.stage == cldnn::instrumentation::profiling_stage::executing) {
GPU_DEBUG_CODE(stage_prof.set_custom_stage_duration(interval.value->value()));
}
}
}
}
return ev;