From c00b0b6ae402c781a19cacf966fd4ac379ec5370 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Thu, 30 Sep 2021 10:54:27 +0300 Subject: [PATCH] [GPU] Profiling refactoring and fixes (#7714) --- .../src/cldnn_engine/cldnn_graph.cpp | 12 +++++----- .../clDNN/api/cldnn/runtime/profiling.hpp | 12 ++++++++-- .../clDNN/runtime/ocl/ocl_base_event.hpp | 2 +- .../clDNN/runtime/ocl/ocl_event.cpp | 24 +++++++++---------- .../clDNN/runtime/ocl/ocl_user_event.cpp | 2 +- .../src/impls/onednn/primitive_onednn_base.h | 9 +++---- .../thirdparty/clDNN/src/kernel_runner.cpp | 2 +- 7 files changed, 36 insertions(+), 27 deletions(-) diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp index 945e795e7d8..516ac96d9f9 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp @@ -466,11 +466,11 @@ void CLDNNGraph::UpdatePerfStatistics() { using duration_t = std::chrono::duration; auto count = std::chrono::duration_cast(interval.value->value()).count(); - if (interval.name == "submission") { + if (interval.stage == cldnn::instrumentation::profiling_stage::submission) { pc.cpu_uSec += count; - } else if (interval.name == "executing") { + } else if (interval.stage == cldnn::instrumentation::profiling_stage::executing) { pc.realTime_uSec += count; - } else if (interval.name == "duration") { // "duration" is used for CPU layers + } else if (interval.stage == cldnn::instrumentation::profiling_stage::duration) { // "duration" is used for CPU layers pc.cpu_uSec += count; if (pc.num == 0) @@ -673,11 +673,11 @@ std::map CLDNNGraph::G using duration_t = std::chrono::duration; auto count = std::chrono::duration_cast(interval.value->value()).count(); - if (interval.name == "submission") { + if (interval.stage == cldnn::instrumentation::profiling_stage::submission) { cpuTime += count; - } else if (interval.name == "executing") { + } else if (interval.stage == cldnn::instrumentation::profiling_stage::executing) { deviceTime += count; - } else if (interval.name == "duration") { // "duration" is used for CPU layers + } else if (interval.stage == cldnn::instrumentation::profiling_stage::duration) { // "duration" is used for CPU layers cpuTime += count; } } diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/profiling.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/profiling.hpp index 98c18a483f9..20f428fd4ae 100644 --- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/profiling.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/profiling.hpp @@ -17,6 +17,14 @@ namespace instrumentation { /// @addtogroup cpp_event Events Support /// @{ +/// @brief Represents profiling intervals stages. +enum class profiling_stage { + submission, // Time spent on submitting command by the host to the device associated with the commandqueue. + starting, // Time spent on waiting in the commandqueue before execution. + executing, // Time spent on command execution. + duration // Time spent on command execution for CPU layers. +}; + /// @brief Helper class to calculate time periods. template class timer { @@ -55,9 +63,9 @@ private: std::chrono::nanoseconds _value; }; -/// @brief Represents prifiling interval as its name and value. +/// @brief Represents profiling interval as its type and value. struct profiling_interval { - std::string name; ///< @brief Display name. + profiling_stage stage; ///< @brief Display name. std::shared_ptr value; ///< @brief Interval value. }; diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.hpp index ab0c51e5bf3..e34be492173 100644 --- a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.hpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.hpp @@ -16,7 +16,7 @@ namespace cldnn { namespace ocl { struct profiling_period_ocl_start_stop { - const char* name; + instrumentation::profiling_stage stage; cl_profiling_info start; cl_profiling_info stop; }; diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_event.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_event.cpp index d40dbf7dfa1..c822bbbcef7 100644 --- a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_event.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_event.cpp @@ -25,10 +25,10 @@ bool is_event_profiled(const cl::Event& event) { return false; } -instrumentation::profiling_interval get_profiling_interval(const char* name, cl_ulong start, cl_ulong end) { +instrumentation::profiling_interval get_profiling_interval(instrumentation::profiling_stage stage, cl_ulong start, cl_ulong end) { auto diff = std::chrono::nanoseconds(end - start); auto period = std::make_shared(diff); - return { name, period }; + return { stage, period }; } } // namespace @@ -71,9 +71,9 @@ bool ocl_event::add_event_handler_impl(event_handler, void*) { } static const std::vector profiling_periods{ - {"submission", CL_PROFILING_COMMAND_QUEUED, CL_PROFILING_COMMAND_SUBMIT}, - {"starting", CL_PROFILING_COMMAND_SUBMIT, CL_PROFILING_COMMAND_START}, - {"executing", CL_PROFILING_COMMAND_START, CL_PROFILING_COMMAND_END}, + { instrumentation::profiling_stage::submission, CL_PROFILING_COMMAND_QUEUED, CL_PROFILING_COMMAND_SUBMIT }, + { instrumentation::profiling_stage::starting, CL_PROFILING_COMMAND_SUBMIT, CL_PROFILING_COMMAND_START }, + { instrumentation::profiling_stage::executing, CL_PROFILING_COMMAND_START, CL_PROFILING_COMMAND_END }, }; bool ocl_event::get_profiling_info_impl(std::list& info) { @@ -87,7 +87,7 @@ bool ocl_event::get_profiling_info_impl(std::list>> all_durations; + std::map>> all_durations; for (size_t i = 0; i < _events.size(); i++) { auto be = downcast(_events[i].get()); @@ -129,7 +129,7 @@ bool ocl_events::get_profiling_info_impl(std::list(ev_start), static_cast(ev_end)); - auto& durations = all_durations[period.name]; + auto& durations = all_durations[period.stage]; bool ev_duration_merged = false; auto it = durations.begin(); @@ -173,21 +173,21 @@ bool ocl_events::get_profiling_info_impl(std::listprint_multi_kernel_perf) { - if (0 == strcmp(period.name, "executing")) { + if (period.stage == instrumentation::profiling_stage::executing) { GPU_DEBUG_COUT << "Multi-kernel time: "; - for (auto& duration : all_durations[period.name]) + for (auto& duration : all_durations[period.stage]) std::cout << " " << (duration.second - duration.first) / 1000; std::cout << " Total " << sum / 1000 << std::endl; } } - info.push_back(get_profiling_interval(period.name, 0, sum)); + info.push_back(get_profiling_interval(period.stage, 0, sum)); } return true; diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.cpp index f8d86d0b75e..974025f80db 100644 --- a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.cpp @@ -23,7 +23,7 @@ bool ocl_user_event::get_profiling_info_impl(std::list(_duration->value()); - info.push_back({"duration", period }); + info.push_back({ instrumentation::profiling_stage::executing, period }); return true; } diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/primitive_onednn_base.h b/inference-engine/thirdparty/clDNN/src/impls/onednn/primitive_onednn_base.h index c905c338c40..876f5a4f07c 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/primitive_onednn_base.h +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/primitive_onednn_base.h @@ -115,15 +115,16 @@ protected: auto profiling = engine.configuration().enable_profiling; event::ptr event; - if (profiling) + if (profiling) { stream.finish(); + event = stream.create_user_event(false); + } _prim.execute(stream.get_onednn_stream(), _args); if (profiling) { - // Measure all previously queued tasks - event = stream.enqueue_marker({}, true); - stream.wait_for_events({event}); + stream.finish(); + event->set(); } else { // Create and set user event as complete event = stream.create_user_event(true); diff --git a/inference-engine/thirdparty/clDNN/src/kernel_runner.cpp b/inference-engine/thirdparty/clDNN/src/kernel_runner.cpp index 8f937863b52..b5137ae1d49 100644 --- a/inference-engine/thirdparty/clDNN/src/kernel_runner.cpp +++ b/inference-engine/thirdparty/clDNN/src/kernel_runner.cpp @@ -215,7 +215,7 @@ std::vector kernel_runner::run_kernels(const kernel_se if (event.get() != NULL) { auto profiling_intervals = event->get_profiling_info(); for (auto const& profiling_interval : profiling_intervals) { - if (profiling_interval.name == "executing") { + if (profiling_interval.stage == instrumentation::profiling_stage::executing) { kernel_run_time = std::min(profiling_interval.value->value(), kernel_run_time); num_of_runs++; break;