[GPU] Improve OV_GPU_DumpLayers debug configuration (#15719)

Co-authored-by: Kim,SungEun <sungeun.kim@intel.com>
This commit is contained in:
Dohyun Kim (Felix) 2023-02-19 23:57:19 +09:00 committed by GitHub
parent 1d5839fb92
commit b7bcef6864
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 65 additions and 38 deletions

View File

@ -93,6 +93,7 @@ public:
int dump_layers_dst_only; // Dump only output of layers
int dump_layers_result; // Dump result layers
int dump_layers_limit_batch; // Limit the size of batch to dump
int dump_layers_raw; // Dump raw data.
int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation
std::vector<std::string> after_proc; // Start inference after the listed processes
int serialize_compile; // Serialize creating primitives and compiling kernels

View File

@ -146,7 +146,7 @@ size_t get_x_pitch(const layout& layout) {
}
template <class T>
void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) {
auto&& size = mem->get_layout().get_tensor();
GPU_DEBUG_GET_INSTANCE(debug_config);
@ -155,11 +155,15 @@ void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
tmp_size.batch[0] = batch_size;
if (tmp_size == size) {
file_stream << "shape: " << size.to_string() << " ";
file_stream << "(count: " << size.count() << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" << std::endl;
file_stream << "(count: " << size.count()
<< ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")"
<< (dump_raw ? " raw data" : "") << std::endl;
} else {
file_stream << "shape: " << tmp_size.to_string() << " ";
file_stream << "(count: " << tmp_size.count() << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format)
<< ", original shape: " << size.to_string() << ")" << std::endl;
file_stream << "(count: " << tmp_size.count()
<< ", original format: " << cldnn::fmt_to_str(mem->get_layout().format)
<< ", original shape: " << size.to_string() << ")"
<< (dump_raw ? " raw data" : "") << std::endl;
}
if (size.count() == 0) {
@ -172,6 +176,7 @@ void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
auto x_pitch = get_x_pitch(mem->get_layout());
std::stringstream buffer;
if (!dump_raw) {
for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) {
for (cldnn::tensor::value_type b = 0; b < batch_size; ++b) {
for (cldnn::tensor::value_type f = 0; f < size.feature[0]; ++f) {
@ -190,11 +195,16 @@ void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
}
}
}
} else {
for (size_t i = 0; i < lock.size(); ++i) {
buffer << std::fixed << std::setprecision(6) << convert_element(mem_ptr[i]) << std::endl;
}
}
file_stream << buffer.str();
}
template <>
void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) {
auto&& l = mem->get_layout();
file_stream << "shape: ";
@ -207,6 +217,7 @@ void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream)
mem_lock<uint32_t, mem_lock_type::read> lock(mem, stream);
auto mem_ptr = lock.data();
if (!dump_raw) {
for (cldnn::tensor::value_type b = 0; b < l.batch(); ++b) {
for (cldnn::tensor::value_type f = 0; f < (cldnn::tensor::value_type)ceil_div(l.feature(), 32); ++f) {
for (cldnn::tensor::value_type z = 0; z < l.spatial(2); ++z) {
@ -220,10 +231,15 @@ void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream)
}
}
}
} else {
for (size_t i = 0; i < lock.size(); ++i) {
file_stream << std::fixed << std::setprecision(6) << mem_ptr[i] << std::endl;
}
}
}
void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
std::cout << "Dump " << layerName << std::endl;
void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName, bool dump_raw) {
std::cout << "Dump " << (dump_raw ? "raw " : "") << layerName << std::endl;
GPU_DEBUG_GET_INSTANCE(debug_config);
std::string filename = layerName;
std::replace(filename.begin(), filename.end(), '\\', '_');
@ -239,17 +255,17 @@ void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName)
auto mem_dt = mem->get_layout().data_type;
if (mem_dt == cldnn::data_types::f32)
dump<float>(mem, stream, file_stream);
dump<float>(mem, stream, file_stream, dump_raw);
else if (mem_dt == cldnn::data_types::f16)
dump<half_t>(mem, stream, file_stream);
dump<half_t>(mem, stream, file_stream, dump_raw);
else if (mem_dt == cldnn::data_types::bin)
dump<uint32_t>(mem, stream, file_stream);
dump<uint32_t>(mem, stream, file_stream, dump_raw);
else if (mem_dt == cldnn::data_types::i32)
dump<int32_t>(mem, stream, file_stream);
dump<int32_t>(mem, stream, file_stream, dump_raw);
else if (mem_dt == cldnn::data_types::i8)
dump<int8_t>(mem, stream, file_stream);
dump<int8_t>(mem, stream, file_stream, dump_raw);
else if (mem_dt == cldnn::data_types::u8)
dump<uint8_t>(mem, stream, file_stream);
dump<uint8_t>(mem, stream, file_stream, dump_raw);
}
void wait_for_the_turn() {
@ -272,7 +288,7 @@ void wait_for_the_turn() {
#else
void dump_perf_data_raw(std::string, const std::list<std::shared_ptr<primitive_inst>>&) {}
void log_memory_to_file(memory::ptr, stream&, std::string) {}
void log_memory_to_file(memory::ptr, stream&, std::string, bool dump_raw) {}
void wait_for_the_turn() {}
#endif
} // namespace
@ -988,11 +1004,14 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
std::cerr << inst->id() << std::endl;
}
GPU_DEBUG_IF(debug_config->dump_layers_dst_only == 0 &&
debug_config->is_dumped_layer(layer_name)) {
GPU_DEBUG_IF(debug_config->dump_layers_dst_only == 0 && debug_config->is_dumped_layer(layer_name)) {
for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(),
layer_name + "_src_" + std::to_string(i));
log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i),
get_stream(),
"program" + std::to_string(get_program()->get_id()) +
"_network" + std::to_string(get_id()) +
"_" + layer_name + "_src" + std::to_string(i),
debug_config->dump_layers_raw);
}
}
}
@ -1004,8 +1023,12 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
const std::string layer_name = inst->id();
GPU_DEBUG_IF(debug_config->is_dumped_layer(layer_name, inst->is_output())) {
for (size_t i = 0; i < get_primitive(inst->id())->outputs_memory_count(); i++) {
log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(i), get_stream(),
layer_name + "_dst_" + std::to_string(i));
log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(i),
get_stream(),
"program" + std::to_string(get_program()->get_id()) +
"_network" + std::to_string(get_id()) +
"_" + layer_name + "_dst" + std::to_string(i),
debug_config->dump_layers_raw);
}
}
}

View File

@ -117,6 +117,7 @@ static void print_help_messages() {
message_list.emplace_back("OV_GPU_DumpLayersResult", "Dump output buffers of result layers only");
message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers");
message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump");
message_list.emplace_back("OV_GPU_DumpLayersRaw", "If true, dump data is stored in raw memory format.");
message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path");
message_list.emplace_back("OV_GPU_BaseBatchForMemEstimation", "Base batch size to be used in memory estimation");
message_list.emplace_back("OV_GPU_AfterProc", "Run inference after the specified process PIDs are finished, separated by space."
@ -156,6 +157,7 @@ debug_configuration::debug_configuration()
, dump_layers_dst_only(0)
, dump_layers_result(0)
, dump_layers_limit_batch(std::numeric_limits<int>::max())
, dump_layers_raw(0)
, base_batch_for_memory_estimation(-1)
, serialize_compile(0)
, max_kernels_per_batch(0) {
@ -168,6 +170,7 @@ debug_configuration::debug_configuration()
get_gpu_debug_env_var("DumpSources", dump_sources);
get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch);
get_gpu_debug_env_var("DumpLayersRaw", dump_layers_raw);
get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
get_gpu_debug_env_var("DumpLayersResult", dump_layers_result);
get_gpu_debug_env_var("DisableOnednn", disable_onednn);