[GPU] Debug config improvement (#6575)
This commit is contained in:
parent
98148539b3
commit
0d775269ea
@ -24,8 +24,13 @@ private:
|
||||
debug_configuration();
|
||||
public:
|
||||
static const char *prefix;
|
||||
int verbose;
|
||||
std::string dump_graphs;
|
||||
int verbose; // Verbose execution
|
||||
int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive
|
||||
int disable_usm; // Disable usm usage
|
||||
std::string dump_graphs; // Dump optimized graph
|
||||
std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path
|
||||
std::string dump_layers; // Dump intermediate buffers of specified layers only, separated by space
|
||||
int dump_layers_dst_only; // Dump only output of layers
|
||||
static const debug_configuration *get_instance();
|
||||
};
|
||||
|
||||
|
@ -35,10 +35,26 @@ static void get_str_env(const std::string &var, std::string &val) {
|
||||
|
||||
debug_configuration::debug_configuration()
|
||||
: verbose(0)
|
||||
, dump_graphs(std::string()) {
|
||||
, print_multi_kernel_perf(0)
|
||||
, disable_usm(0)
|
||||
, dump_graphs(std::string())
|
||||
, dump_layers_path(std::string())
|
||||
, dump_layers(std::string())
|
||||
, dump_layers_dst_only(0) {
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
get_int_env("OV_GPU_Verbose", verbose);
|
||||
get_int_env("OV_GPU_PrintMultiKernelPerf", print_multi_kernel_perf);
|
||||
get_int_env("OV_GPU_DisableUsm", disable_usm);
|
||||
get_str_env("OV_GPU_DumpGraphs", dump_graphs);
|
||||
get_str_env("OV_GPU_DumpLayersPath", dump_layers_path);
|
||||
get_str_env("OV_GPU_DumpLayers", dump_layers);
|
||||
get_int_env("OV_GPU_DumpLayersDstOnly", dump_layers_dst_only);
|
||||
if (dump_layers_path.length() > 0 && !disable_usm) {
|
||||
disable_usm = 1;
|
||||
GPU_DEBUG_COUT << "DisableUsm=1 because of DumpLayersPath" << std::endl;
|
||||
}
|
||||
if (dump_layers.length() > 0)
|
||||
dump_layers = " " + dump_layers + " "; // Insert delimiter for easier parsing when used
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "cldnn/runtime/memory.hpp"
|
||||
#include "cldnn/runtime/stream.hpp"
|
||||
#include "cldnn/runtime/device_query.hpp"
|
||||
#include "cldnn/runtime/debug_configuration.hpp"
|
||||
|
||||
#include "ocl/ocl_engine_factory.hpp"
|
||||
|
||||
@ -32,6 +33,10 @@ const device::ptr engine::get_device() const {
|
||||
}
|
||||
|
||||
bool engine::use_unified_shared_memory() const {
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->disable_usm) {
|
||||
return false;
|
||||
}
|
||||
if (_device->get_mem_caps().supports_usm() && _configuration.use_unified_shared_memory) {
|
||||
return true;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "ocl_event.hpp"
|
||||
#include "cldnn/runtime/debug_configuration.hpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
@ -175,6 +176,17 @@ bool ocl_events::get_profiling_info_impl(std::list<instrumentation::profiling_in
|
||||
for (auto& duration : all_durations[period.name]) {
|
||||
sum += (duration.second - duration.first);
|
||||
}
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->print_multi_kernel_perf) {
|
||||
if (0 == strcmp(period.name, "executing")) {
|
||||
GPU_DEBUG_COUT << "Multi-kernel time: ";
|
||||
for (auto& duration : all_durations[period.name])
|
||||
std::cout << " " << (duration.second - duration.first) / 1000;
|
||||
std::cout << " Total " << sum / 1000 << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
info.push_back(get_profiling_interval(period.name, 0, sum));
|
||||
}
|
||||
|
||||
|
@ -33,15 +33,9 @@
|
||||
#include <utility>
|
||||
#include <map>
|
||||
|
||||
// #define DEBUG_DUMP_PATH "cldnn_dump/"
|
||||
|
||||
#ifdef DEBUG_DUMP_PATH
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
|
||||
#define DUMP_VERBOSE 0
|
||||
#define DUMP_SINGLE_LAYER 0
|
||||
#define DUMP_LAYER_NAME ""
|
||||
#endif
|
||||
|
||||
namespace cldnn {
|
||||
@ -131,7 +125,7 @@ std::map<primitive_id, network_output> network::execute(const std::vector<event:
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_DUMP_PATH
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false) {
|
||||
#if defined HALF_HALF_HPP
|
||||
return val;
|
||||
@ -180,6 +174,19 @@ float convert_element(float f) { return f; }
|
||||
|
||||
float convert_element(half_t h) { return convert_half_to_float(h); }
|
||||
|
||||
static size_t get_x_pitch(const layout& layout) {
|
||||
try {
|
||||
auto tensor_x0 = tensor(batch(0), feature(0), spatial(0, 0, 0, 0));
|
||||
auto tensor_x1 = tensor(batch(0), feature(0), spatial(1, 0, 0, 0));
|
||||
auto x0 = layout.get_linear_offset(tensor_x0);
|
||||
auto x1 = layout.get_linear_offset(tensor_x1);
|
||||
return (x1 - x0);
|
||||
} catch (...) {
|
||||
// When spatial size of x=0, x_pitch is meaningless
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
|
||||
auto&& size = mem->get_layout().size;
|
||||
@ -189,6 +196,8 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
|
||||
|
||||
mem_lock<T> lock(mem, stream);
|
||||
auto mem_ptr = lock.data();
|
||||
auto x_pitch = get_x_pitch(mem->get_layout());
|
||||
std::stringstream buffer;
|
||||
|
||||
for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) {
|
||||
for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) {
|
||||
@ -196,10 +205,11 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
|
||||
for (cldnn::tensor::value_type w = 0; w < size.spatial[3]; ++w) {
|
||||
for (cldnn::tensor::value_type z = 0; z < size.spatial[2]; ++z) {
|
||||
for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) {
|
||||
for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) {
|
||||
cldnn::tensor t(cldnn::group(g), cldnn::batch(b), cldnn::feature(f), cldnn::spatial(x, y, z, w));
|
||||
cldnn::tensor t(cldnn::group(g), cldnn::batch(b), cldnn::feature(f), cldnn::spatial(0, y, z, w));
|
||||
size_t input_it = mem->get_layout().get_linear_offset(t);
|
||||
file_stream << std::fixed << std::setprecision(6) << convert_element(mem_ptr[input_it]) << std::endl;
|
||||
|
||||
for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x, input_it += x_pitch) {
|
||||
buffer << std::fixed << std::setprecision(6) << convert_element(mem_ptr[input_it]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -207,6 +217,7 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
|
||||
}
|
||||
}
|
||||
}
|
||||
file_stream << buffer.str();
|
||||
}
|
||||
template <>
|
||||
void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
|
||||
@ -238,12 +249,13 @@ void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream)
|
||||
}
|
||||
|
||||
static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
std::string filename = layerName;
|
||||
std::replace(filename.begin(), filename.end(), '\\', '_');
|
||||
std::replace(filename.begin(), filename.end(), '/', '_');
|
||||
std::replace(filename.begin(), filename.end(), ' ', '_');
|
||||
std::replace(filename.begin(), filename.end(), ':', '_');
|
||||
filename = DEBUG_DUMP_PATH + filename + ".txt";
|
||||
filename = debug_config->dump_layers_path + filename + ".txt";
|
||||
|
||||
std::ofstream file_stream(filename);
|
||||
auto mem_dt = mem->get_layout().data_type;
|
||||
@ -260,6 +272,12 @@ static void log_memory_to_file(memory::ptr mem, stream& stream, std::string laye
|
||||
else if (mem_dt == cldnn::data_types::u8)
|
||||
dump<uint8_t>(mem, stream, file_stream);
|
||||
}
|
||||
#else
|
||||
static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
|
||||
(void)mem;
|
||||
(void)stream;
|
||||
(void)layerName;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
Network_impl will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants
|
||||
@ -487,25 +505,24 @@ void network_impl::execute(const std::vector<event::ptr>& events) {
|
||||
set_arguments();
|
||||
|
||||
for (auto& inst : _exec_order) {
|
||||
#ifdef DEBUG_DUMP_PATH
|
||||
GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
|
||||
auto& node = _program->get_node(inst->id());
|
||||
|
||||
std::string layer_name = node.id();
|
||||
#if DUMP_VERBOSE
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
std::cerr << get_primitive_info(inst->id()) << std::endl;
|
||||
#endif
|
||||
#if DUMP_SINGLE_LAYER
|
||||
if (layer_name == DUMP_LAYER_NAME) {
|
||||
#endif
|
||||
std::cerr << "Dump " << layer_name << " layer" << std::endl;
|
||||
}
|
||||
|
||||
GPU_DEBUG_IF(debug_config->dump_layers_dst_only == 0 &&
|
||||
(debug_config->dump_layers.length() == 0 ||
|
||||
(debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos))) {
|
||||
std::cout << "Dump " << layer_name << " layer src" << std::endl;
|
||||
for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
|
||||
log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(),
|
||||
layer_name + "_src_" + std::to_string(i));
|
||||
}
|
||||
#if DUMP_SINGLE_LAYER
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||
GPU_DEBUG_COUT << "Execute " << inst->id() << std::endl;
|
||||
}
|
||||
@ -517,16 +534,16 @@ void network_impl::execute(const std::vector<event::ptr>& events) {
|
||||
}
|
||||
execute_primitive(inst, events);
|
||||
|
||||
#ifdef DEBUG_DUMP_PATH
|
||||
GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
|
||||
get_stream().finish();
|
||||
#if DUMP_SINGLE_LAYER
|
||||
if (layer_name == DUMP_LAYER_NAME)
|
||||
#endif
|
||||
{
|
||||
auto& node = _program->get_node(inst->id());
|
||||
std::string layer_name = node.id();
|
||||
GPU_DEBUG_IF(debug_config->dump_layers.length() == 0 ||
|
||||
(debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos)) {
|
||||
std::cout << "Dump " << layer_name << " layer dst" << std::endl;
|
||||
log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0");
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& inst : _program->get_processing_order()) {
|
||||
|
20
inference-engine/thirdparty/clDNN/tests/test_cases/debug_config_gpu_test.cpp
vendored
Normal file
20
inference-engine/thirdparty/clDNN/tests/test_cases/debug_config_gpu_test.cpp
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "test_utils/test_utils.h"
|
||||
#include "cldnn/runtime/debug_configuration.hpp"
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
TEST(debug_config_test, check_debug_config_off_on_release) {
|
||||
#ifdef NDEBUG
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(1) {
|
||||
GTEST_FAIL(); /* This should be disabled in case of release build */
|
||||
}
|
||||
#endif
|
||||
}
|
Loading…
Reference in New Issue
Block a user