[GPU] Profiling refactoring and fixes (#7714)

This commit is contained in:
Sergey Shlyapnikov 2021-09-30 10:54:27 +03:00 committed by GitHub
parent 17091476d8
commit c00b0b6ae4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 36 additions and 27 deletions

View File

@ -466,11 +466,11 @@ void CLDNNGraph::UpdatePerfStatistics() {
using duration_t = std::chrono::duration<long long, std::chrono::microseconds::period>;
auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();
if (interval.name == "submission") {
if (interval.stage == cldnn::instrumentation::profiling_stage::submission) {
pc.cpu_uSec += count;
} else if (interval.name == "executing") {
} else if (interval.stage == cldnn::instrumentation::profiling_stage::executing) {
pc.realTime_uSec += count;
} else if (interval.name == "duration") { // "duration" is used for CPU layers
} else if (interval.stage == cldnn::instrumentation::profiling_stage::duration) { // "duration" is used for CPU layers
pc.cpu_uSec += count;
if (pc.num == 0)
@ -673,11 +673,11 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
using duration_t = std::chrono::duration<long long, std::chrono::microseconds::period>;
auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();
if (interval.name == "submission") {
if (interval.stage == cldnn::instrumentation::profiling_stage::submission) {
cpuTime += count;
} else if (interval.name == "executing") {
} else if (interval.stage == cldnn::instrumentation::profiling_stage::executing) {
deviceTime += count;
} else if (interval.name == "duration") { // "duration" is used for CPU layers
} else if (interval.stage == cldnn::instrumentation::profiling_stage::duration) { // "duration" is used for CPU layers
cpuTime += count;
}
}

View File

@ -17,6 +17,14 @@ namespace instrumentation {
/// @addtogroup cpp_event Events Support
/// @{
/// @brief Represents profiling intervals stages.
enum class profiling_stage {
submission, // Time spent on submitting command by the host to the device associated with the commandqueue.
starting, // Time spent on waiting in the commandqueue before execution.
executing, // Time spent on command execution.
duration // Time spent on command execution for CPU layers.
};
/// @brief Helper class to calculate time periods.
template <class ClockTy = std::chrono::steady_clock>
class timer {
@ -55,9 +63,9 @@ private:
std::chrono::nanoseconds _value;
};
/// @brief Represents prifiling interval as its name and value.
/// @brief Represents profiling interval as its type and value.
struct profiling_interval {
std::string name; ///< @brief Display name.
profiling_stage stage; ///< @brief Display name.
std::shared_ptr<profiling_period> value; ///< @brief Interval value.
};

View File

@ -16,7 +16,7 @@ namespace cldnn {
namespace ocl {
struct profiling_period_ocl_start_stop {
const char* name;
instrumentation::profiling_stage stage;
cl_profiling_info start;
cl_profiling_info stop;
};

View File

@ -25,10 +25,10 @@ bool is_event_profiled(const cl::Event& event) {
return false;
}
instrumentation::profiling_interval get_profiling_interval(const char* name, cl_ulong start, cl_ulong end) {
instrumentation::profiling_interval get_profiling_interval(instrumentation::profiling_stage stage, cl_ulong start, cl_ulong end) {
auto diff = std::chrono::nanoseconds(end - start);
auto period = std::make_shared<instrumentation::profiling_period_basic>(diff);
return { name, period };
return { stage, period };
}
} // namespace
@ -71,9 +71,9 @@ bool ocl_event::add_event_handler_impl(event_handler, void*) {
}
static const std::vector<profiling_period_ocl_start_stop> profiling_periods{
{"submission", CL_PROFILING_COMMAND_QUEUED, CL_PROFILING_COMMAND_SUBMIT},
{"starting", CL_PROFILING_COMMAND_SUBMIT, CL_PROFILING_COMMAND_START},
{"executing", CL_PROFILING_COMMAND_START, CL_PROFILING_COMMAND_END},
{ instrumentation::profiling_stage::submission, CL_PROFILING_COMMAND_QUEUED, CL_PROFILING_COMMAND_SUBMIT },
{ instrumentation::profiling_stage::starting, CL_PROFILING_COMMAND_SUBMIT, CL_PROFILING_COMMAND_START },
{ instrumentation::profiling_stage::executing, CL_PROFILING_COMMAND_START, CL_PROFILING_COMMAND_END },
};
bool ocl_event::get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) {
@ -87,7 +87,7 @@ bool ocl_event::get_profiling_info_impl(std::list<instrumentation::profiling_int
_event.getProfilingInfo(period.start, &start);
_event.getProfilingInfo(period.stop, &end);
info.push_back(get_profiling_interval(period.name, start, end));
info.push_back(get_profiling_interval(period.stage, start, end));
}
return true;
@ -114,7 +114,7 @@ bool ocl_events::get_profiling_info_impl(std::list<instrumentation::profiling_in
// For every profiling period (i.e. submission / starting / executing),
// the goal is to sum up all disjoint durations of its projection on the time axis
std::map<std::string, std::vector<std::pair<unsigned long long, unsigned long long>>> all_durations;
std::map<instrumentation::profiling_stage, std::vector<std::pair<unsigned long long, unsigned long long>>> all_durations;
for (size_t i = 0; i < _events.size(); i++) {
auto be = downcast<ocl_event>(_events[i].get());
@ -129,7 +129,7 @@ bool ocl_events::get_profiling_info_impl(std::list<instrumentation::profiling_in
auto ev_duration = std::make_pair(static_cast<unsigned long long>(ev_start),
static_cast<unsigned long long>(ev_end));
auto& durations = all_durations[period.name];
auto& durations = all_durations[period.stage];
bool ev_duration_merged = false;
auto it = durations.begin();
@ -173,21 +173,21 @@ bool ocl_events::get_profiling_info_impl(std::list<instrumentation::profiling_in
for (auto& period : profiling_periods) {
unsigned long long sum = 0;
for (auto& duration : all_durations[period.name]) {
for (auto& duration : all_durations[period.stage]) {
sum += (duration.second - duration.first);
}
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->print_multi_kernel_perf) {
if (0 == strcmp(period.name, "executing")) {
if (period.stage == instrumentation::profiling_stage::executing) {
GPU_DEBUG_COUT << "Multi-kernel time: ";
for (auto& duration : all_durations[period.name])
for (auto& duration : all_durations[period.stage])
std::cout << " " << (duration.second - duration.first) / 1000;
std::cout << " Total " << sum / 1000 << std::endl;
}
}
info.push_back(get_profiling_interval(period.name, 0, sum));
info.push_back(get_profiling_interval(period.stage, 0, sum));
}
return true;

View File

@ -23,7 +23,7 @@ bool ocl_user_event::get_profiling_info_impl(std::list<cldnn::instrumentation::p
}
auto period = std::make_shared<instrumentation::profiling_period_basic>(_duration->value());
info.push_back({"duration", period });
info.push_back({ instrumentation::profiling_stage::executing, period });
return true;
}

View File

@ -115,15 +115,16 @@ protected:
auto profiling = engine.configuration().enable_profiling;
event::ptr event;
if (profiling)
if (profiling) {
stream.finish();
event = stream.create_user_event(false);
}
_prim.execute(stream.get_onednn_stream(), _args);
if (profiling) {
// Measure all previously queued tasks
event = stream.enqueue_marker({}, true);
stream.wait_for_events({event});
stream.finish();
event->set();
} else {
// Create and set user event as complete
event = stream.create_user_event(true);

View File

@ -215,7 +215,7 @@ std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_se
if (event.get() != NULL) {
auto profiling_intervals = event->get_profiling_info();
for (auto const& profiling_interval : profiling_intervals) {
if (profiling_interval.name == "executing") {
if (profiling_interval.stage == instrumentation::profiling_stage::executing) {
kernel_run_time = std::min(profiling_interval.value->value(), kernel_run_time);
num_of_runs++;
break;