[GPU] Debug config improvement (#6575)

This commit is contained in:
Mingyu Kim 2021-07-09 18:23:16 +09:00 committed by GitHub
parent 98148539b3
commit 0d775269ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 117 additions and 42 deletions

View File

@ -24,8 +24,13 @@ private:
debug_configuration(); debug_configuration();
public: public:
static const char *prefix; static const char *prefix;
int verbose; int verbose; // Verbose execution
std::string dump_graphs; int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive
int disable_usm; // Disable usm usage
std::string dump_graphs; // Dump optimized graph
std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path
std::string dump_layers; // Dump intermediate buffers of specified layers only, separated by space
int dump_layers_dst_only; // Dump only output of layers
static const debug_configuration *get_instance(); static const debug_configuration *get_instance();
}; };

View File

@ -35,10 +35,26 @@ static void get_str_env(const std::string &var, std::string &val) {
debug_configuration::debug_configuration() debug_configuration::debug_configuration()
: verbose(0) : verbose(0)
, dump_graphs(std::string()) { , print_multi_kernel_perf(0)
, disable_usm(0)
, dump_graphs(std::string())
, dump_layers_path(std::string())
, dump_layers(std::string())
, dump_layers_dst_only(0) {
#ifdef GPU_DEBUG_CONFIG #ifdef GPU_DEBUG_CONFIG
get_int_env("OV_GPU_Verbose", verbose); get_int_env("OV_GPU_Verbose", verbose);
get_int_env("OV_GPU_PrintMultiKernelPerf", print_multi_kernel_perf);
get_int_env("OV_GPU_DisableUsm", disable_usm);
get_str_env("OV_GPU_DumpGraphs", dump_graphs); get_str_env("OV_GPU_DumpGraphs", dump_graphs);
get_str_env("OV_GPU_DumpLayersPath", dump_layers_path);
get_str_env("OV_GPU_DumpLayers", dump_layers);
get_int_env("OV_GPU_DumpLayersDstOnly", dump_layers_dst_only);
if (dump_layers_path.length() > 0 && !disable_usm) {
disable_usm = 1;
GPU_DEBUG_COUT << "DisableUsm=1 because of DumpLayersPath" << std::endl;
}
if (dump_layers.length() > 0)
dump_layers = " " + dump_layers + " "; // Insert delimiter for easier parsing when used
#endif #endif
} }

View File

@ -7,6 +7,7 @@
#include "cldnn/runtime/memory.hpp" #include "cldnn/runtime/memory.hpp"
#include "cldnn/runtime/stream.hpp" #include "cldnn/runtime/stream.hpp"
#include "cldnn/runtime/device_query.hpp" #include "cldnn/runtime/device_query.hpp"
#include "cldnn/runtime/debug_configuration.hpp"
#include "ocl/ocl_engine_factory.hpp" #include "ocl/ocl_engine_factory.hpp"
@ -32,6 +33,10 @@ const device::ptr engine::get_device() const {
} }
bool engine::use_unified_shared_memory() const { bool engine::use_unified_shared_memory() const {
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->disable_usm) {
return false;
}
if (_device->get_mem_caps().supports_usm() && _configuration.use_unified_shared_memory) { if (_device->get_mem_caps().supports_usm() && _configuration.use_unified_shared_memory) {
return true; return true;
} }

View File

@ -3,6 +3,7 @@
// //
#include "ocl_event.hpp" #include "ocl_event.hpp"
#include "cldnn/runtime/debug_configuration.hpp"
#include <cassert> #include <cassert>
#include <iostream> #include <iostream>
@ -175,6 +176,17 @@ bool ocl_events::get_profiling_info_impl(std::list<instrumentation::profiling_in
for (auto& duration : all_durations[period.name]) { for (auto& duration : all_durations[period.name]) {
sum += (duration.second - duration.first); sum += (duration.second - duration.first);
} }
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->print_multi_kernel_perf) {
if (0 == strcmp(period.name, "executing")) {
GPU_DEBUG_COUT << "Multi-kernel time: ";
for (auto& duration : all_durations[period.name])
std::cout << " " << (duration.second - duration.first) / 1000;
std::cout << " Total " << sum / 1000 << std::endl;
}
}
info.push_back(get_profiling_interval(period.name, 0, sum)); info.push_back(get_profiling_interval(period.name, 0, sum));
} }

View File

@ -33,15 +33,9 @@
#include <utility> #include <utility>
#include <map> #include <map>
// #define DEBUG_DUMP_PATH "cldnn_dump/" #ifdef GPU_DEBUG_CONFIG
#ifdef DEBUG_DUMP_PATH
#include <iomanip> #include <iomanip>
#include <fstream> #include <fstream>
#define DUMP_VERBOSE 0
#define DUMP_SINGLE_LAYER 0
#define DUMP_LAYER_NAME ""
#endif #endif
namespace cldnn { namespace cldnn {
@ -131,7 +125,7 @@ std::map<primitive_id, network_output> network::execute(const std::vector<event:
return result; return result;
} }
#ifdef DEBUG_DUMP_PATH #ifdef GPU_DEBUG_CONFIG
static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false) { static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false) {
#if defined HALF_HALF_HPP #if defined HALF_HALF_HPP
return val; return val;
@ -180,6 +174,19 @@ float convert_element(float f) { return f; }
float convert_element(half_t h) { return convert_half_to_float(h); } float convert_element(half_t h) { return convert_half_to_float(h); }
static size_t get_x_pitch(const layout& layout) {
try {
auto tensor_x0 = tensor(batch(0), feature(0), spatial(0, 0, 0, 0));
auto tensor_x1 = tensor(batch(0), feature(0), spatial(1, 0, 0, 0));
auto x0 = layout.get_linear_offset(tensor_x0);
auto x1 = layout.get_linear_offset(tensor_x1);
return (x1 - x0);
} catch (...) {
// When spatial size of x=0, x_pitch is meaningless
return 0;
}
}
template <class T> template <class T>
static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) { static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
auto&& size = mem->get_layout().size; auto&& size = mem->get_layout().size;
@ -189,6 +196,8 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
mem_lock<T> lock(mem, stream); mem_lock<T> lock(mem, stream);
auto mem_ptr = lock.data(); auto mem_ptr = lock.data();
auto x_pitch = get_x_pitch(mem->get_layout());
std::stringstream buffer;
for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) { for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) {
for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) { for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) {
@ -196,10 +205,11 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
for (cldnn::tensor::value_type w = 0; w < size.spatial[3]; ++w) { for (cldnn::tensor::value_type w = 0; w < size.spatial[3]; ++w) {
for (cldnn::tensor::value_type z = 0; z < size.spatial[2]; ++z) { for (cldnn::tensor::value_type z = 0; z < size.spatial[2]; ++z) {
for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) { for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) {
for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) { cldnn::tensor t(cldnn::group(g), cldnn::batch(b), cldnn::feature(f), cldnn::spatial(0, y, z, w));
cldnn::tensor t(cldnn::group(g), cldnn::batch(b), cldnn::feature(f), cldnn::spatial(x, y, z, w)); size_t input_it = mem->get_layout().get_linear_offset(t);
size_t input_it = mem->get_layout().get_linear_offset(t);
file_stream << std::fixed << std::setprecision(6) << convert_element(mem_ptr[input_it]) << std::endl; for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x, input_it += x_pitch) {
buffer << std::fixed << std::setprecision(6) << convert_element(mem_ptr[input_it]) << std::endl;
} }
} }
} }
@ -207,6 +217,7 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
} }
} }
} }
file_stream << buffer.str();
} }
template <> template <>
void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream) { void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
@ -238,12 +249,13 @@ void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream)
} }
static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) { static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
GPU_DEBUG_GET_INSTANCE(debug_config);
std::string filename = layerName; std::string filename = layerName;
std::replace(filename.begin(), filename.end(), '\\', '_'); std::replace(filename.begin(), filename.end(), '\\', '_');
std::replace(filename.begin(), filename.end(), '/', '_'); std::replace(filename.begin(), filename.end(), '/', '_');
std::replace(filename.begin(), filename.end(), ' ', '_'); std::replace(filename.begin(), filename.end(), ' ', '_');
std::replace(filename.begin(), filename.end(), ':', '_'); std::replace(filename.begin(), filename.end(), ':', '_');
filename = DEBUG_DUMP_PATH + filename + ".txt"; filename = debug_config->dump_layers_path + filename + ".txt";
std::ofstream file_stream(filename); std::ofstream file_stream(filename);
auto mem_dt = mem->get_layout().data_type; auto mem_dt = mem->get_layout().data_type;
@ -260,6 +272,12 @@ static void log_memory_to_file(memory::ptr mem, stream& stream, std::string laye
else if (mem_dt == cldnn::data_types::u8) else if (mem_dt == cldnn::data_types::u8)
dump<uint8_t>(mem, stream, file_stream); dump<uint8_t>(mem, stream, file_stream);
} }
#else
static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
(void)mem;
(void)stream;
(void)layerName;
}
#endif #endif
/* /*
Network_impl will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants Network_impl will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants
@ -487,25 +505,24 @@ void network_impl::execute(const std::vector<event::ptr>& events) {
set_arguments(); set_arguments();
for (auto& inst : _exec_order) { for (auto& inst : _exec_order) {
#ifdef DEBUG_DUMP_PATH GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
auto& node = _program->get_node(inst->id()); auto& node = _program->get_node(inst->id());
std::string layer_name = node.id();
std::string layer_name = node.id(); GPU_DEBUG_IF(debug_config->verbose >= 2) {
#if DUMP_VERBOSE std::cerr << get_primitive_info(inst->id()) << std::endl;
std::cerr << get_primitive_info(inst->id()) << std::endl; }
#endif
#if DUMP_SINGLE_LAYER GPU_DEBUG_IF(debug_config->dump_layers_dst_only == 0 &&
if (layer_name == DUMP_LAYER_NAME) { (debug_config->dump_layers.length() == 0 ||
#endif (debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos))) {
std::cerr << "Dump " << layer_name << " layer" << std::endl; std::cout << "Dump " << layer_name << " layer src" << std::endl;
for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) { for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(), log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(),
layer_name + "_src_" + std::to_string(i)); layer_name + "_src_" + std::to_string(i));
}
} }
#if DUMP_SINGLE_LAYER
} }
#endif
#endif
GPU_DEBUG_IF(debug_config->verbose >= 1) { GPU_DEBUG_IF(debug_config->verbose >= 1) {
GPU_DEBUG_COUT << "Execute " << inst->id() << std::endl; GPU_DEBUG_COUT << "Execute " << inst->id() << std::endl;
} }
@ -517,16 +534,16 @@ void network_impl::execute(const std::vector<event::ptr>& events) {
} }
execute_primitive(inst, events); execute_primitive(inst, events);
#ifdef DEBUG_DUMP_PATH GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
get_stream().finish(); get_stream().finish();
#if DUMP_SINGLE_LAYER auto& node = _program->get_node(inst->id());
if (layer_name == DUMP_LAYER_NAME) std::string layer_name = node.id();
#endif GPU_DEBUG_IF(debug_config->dump_layers.length() == 0 ||
{ (debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos)) {
log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0"); std::cout << "Dump " << layer_name << " layer dst" << std::endl;
log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0");
}
} }
#endif
} }
for (auto& inst : _program->get_processing_order()) { for (auto& inst : _program->get_processing_order()) {

View File

@ -0,0 +1,20 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "test_utils/test_utils.h"
#include "cldnn/runtime/debug_configuration.hpp"
using namespace cldnn;
using namespace ::tests;
TEST(debug_config_test, check_debug_config_off_on_release) {
#ifdef NDEBUG
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(1) {
GTEST_FAIL(); /* This should be disabled in case of release build */
}
#endif
}