[GPU] Improvement for buffer dump (#18542)
* [GPU] Improvement for buffer dump + added OV_GPU_DumpLayersInput to support dump input layers + added OV_GPU_DumpLayersRawBinary to make binary dump + added OV_GPU_LoadDumpRawBinary to use binary dump as input + binary dump naming rule layername_datatype_tensor_format.bin Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
parent
c145d8f1e9
commit
83a78eb559
@ -337,6 +337,7 @@ std::vector<uint8_t> load_binary(const std::string& path);
|
||||
* @param path - binary file path to store
|
||||
*/
|
||||
void save_binary(const std::string& path, std::vector<uint8_t> binary);
|
||||
void save_binary(const std::string& path, const char* binary, size_t bin_size);
|
||||
|
||||
/**
|
||||
* @brief Trim OpenVINO project file name path if OpenVINO project directory found.
|
||||
|
@ -621,6 +621,11 @@ std::vector<uint8_t> ov::util::load_binary(const std::string& path) {
|
||||
}
|
||||
|
||||
void ov::util::save_binary(const std::string& path, std::vector<uint8_t> binary) {
|
||||
save_binary(path, reinterpret_cast<const char*>(&binary[0]), binary.size());
|
||||
return;
|
||||
}
|
||||
|
||||
void ov::util::save_binary(const std::string& path, const char* binary, size_t bin_size) {
|
||||
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
|
||||
std::wstring widefilename = ov::util::string_to_wstring(path);
|
||||
const wchar_t* filename = widefilename.c_str();
|
||||
@ -629,7 +634,7 @@ void ov::util::save_binary(const std::string& path, std::vector<uint8_t> binary)
|
||||
#endif
|
||||
std::ofstream out_file(filename, std::ios::out | std::ios::binary);
|
||||
if (out_file.is_open()) {
|
||||
out_file.write(reinterpret_cast<const char*>(&binary[0]), binary.size());
|
||||
out_file.write(binary, bin_size);
|
||||
} else {
|
||||
throw std::runtime_error("Could not save binary to " + path);
|
||||
}
|
||||
|
@ -94,36 +94,42 @@ private:
|
||||
|
||||
public:
|
||||
static const char *prefix;
|
||||
int help; // Print help messages
|
||||
int verbose; // Verbose execution
|
||||
int verbose_color; // Print verbose color
|
||||
int list_layers; // Print list layers
|
||||
int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive
|
||||
int disable_usm; // Disable usm usage
|
||||
int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU)
|
||||
int disable_onednn_opt_post_ops; // Disable onednn optimize post operators
|
||||
std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir
|
||||
std::string dump_graphs; // Dump optimized graph
|
||||
std::string dump_sources; // Dump opencl sources
|
||||
std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path
|
||||
std::vector<std::string> dump_layers; // Dump intermediate buffers of specified layers only
|
||||
std::string dry_run_path; // Dry run and serialize execution graph into the specified path
|
||||
int dump_layers_dst_only; // Dump only output of layers
|
||||
int dump_layers_result; // Dump result layers
|
||||
int dump_layers_limit_batch; // Limit the size of batch to dump
|
||||
int dump_layers_raw; // Dump raw data.
|
||||
int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation
|
||||
std::vector<std::string> after_proc; // Start inference after the listed processes
|
||||
int serialize_compile; // Serialize creating primitives and compiling kernels
|
||||
std::vector<std::string> forced_impl_types; // Force implementation type either ocl or onednn
|
||||
int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels
|
||||
int disable_async_compilation; // Disable async compilation
|
||||
int disable_dynamic_impl; // Disable dynamic implementation
|
||||
int disable_runtime_buffer_fusing; // Disable runtime buffer fusing
|
||||
std::set<int64_t> dump_iteration; // Dump n-th execution of network.
|
||||
int help; // Print help messages
|
||||
int verbose; // Verbose execution
|
||||
int verbose_color; // Print verbose color
|
||||
int list_layers; // Print list layers
|
||||
int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive
|
||||
int disable_usm; // Disable usm usage
|
||||
int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU)
|
||||
int disable_onednn_opt_post_ops; // Disable onednn optimize post operators
|
||||
std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir
|
||||
std::string dump_graphs; // Dump optimized graph
|
||||
std::string dump_sources; // Dump opencl sources
|
||||
std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path
|
||||
std::vector<std::string> dump_layers; // Dump intermediate buffers of specified layers only
|
||||
std::string dry_run_path; // Dry run and serialize execution graph into the specified path
|
||||
int dump_layers_dst_only; // Dump only output of layers
|
||||
int dump_layers_result; // Dump result layers
|
||||
int dump_layers_input; // Dump input layers
|
||||
int dump_layers_limit_batch; // Limit the size of batch to dump
|
||||
int dump_layers_raw; // Dump raw data.
|
||||
int dump_layers_binary; // Dump binary data.
|
||||
int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation
|
||||
std::vector<std::string> after_proc; // Start inference after the listed processes
|
||||
int serialize_compile; // Serialize creating primitives and compiling kernels
|
||||
std::vector<std::string> forced_impl_types; // Force implementation type either ocl or onednn
|
||||
int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels
|
||||
int disable_async_compilation; // Disable async compilation
|
||||
int disable_dynamic_impl; // Disable dynamic implementation
|
||||
int disable_runtime_buffer_fusing; // Disable runtime buffer fusing
|
||||
std::set<int64_t> dump_iteration; // Dump n-th execution of network.
|
||||
std::vector<std::string> load_layers_raw_dump; // List of layers to load dumped raw binary and filenames
|
||||
static const debug_configuration *get_instance();
|
||||
bool is_dumped_layer(const std::string& layerName, bool is_output = false) const;
|
||||
std::vector<std::string> get_filenames_for_matched_layer_loading_binaries(const std::string& id) const;
|
||||
std::string get_name_for_dump(const std::string& file_name) const;
|
||||
bool is_layer_for_dumping(const std::string& layerName, bool is_output = false, bool is_input = false) const;
|
||||
bool is_target_iteration(int64_t iteration) const;
|
||||
std::string get_matched_from_filelist(const std::vector<std::string>& file_names, std::string pattern) const;
|
||||
|
||||
struct memory_preallocation_params {
|
||||
bool is_initialized = false;
|
||||
|
@ -2,6 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/util/file_util.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/data.hpp"
|
||||
#include "intel_gpu/primitives/mutable_data.hpp"
|
||||
#include "intel_gpu/primitives/input_layout.hpp"
|
||||
@ -247,11 +249,7 @@ void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream,
|
||||
void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName, bool dump_raw) {
|
||||
std::cout << "Dump " << (dump_raw ? "raw " : "") << layerName << std::endl;
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
std::string filename = layerName;
|
||||
std::replace(filename.begin(), filename.end(), '\\', '_');
|
||||
std::replace(filename.begin(), filename.end(), '/', '_');
|
||||
std::replace(filename.begin(), filename.end(), ' ', '_');
|
||||
std::replace(filename.begin(), filename.end(), ':', '_');
|
||||
std::string filename = debug_config->get_name_for_dump(layerName);
|
||||
filename = debug_config->dump_layers_path + filename + ".txt";
|
||||
std::ofstream file_stream(filename);
|
||||
if (!mem) {
|
||||
@ -306,6 +304,25 @@ static uint32_t get_unique_net_id() {
|
||||
return ++id_gen;
|
||||
}
|
||||
|
||||
static std::string get_file_path_for_binary_dump(cldnn::layout layout, std::string name) {
|
||||
std::string filename;
|
||||
std::string data_type = data_type_traits::name(layout.data_type);
|
||||
std::string format = layout.format.to_string();
|
||||
std::string tensor;
|
||||
auto dims = layout.get_dims();
|
||||
for (size_t r = 0 ; r < layout.get_rank() ; r++) {
|
||||
tensor += ("_" + to_string(dims[r]));
|
||||
}
|
||||
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
std::string layer_name = debug_config->get_name_for_dump(name);
|
||||
filename = debug_config->dump_layers_path + layer_name
|
||||
+ "__" + data_type + "_" + tensor + "__" + format + ".bin";
|
||||
#endif
|
||||
return filename;
|
||||
}
|
||||
|
||||
/*
|
||||
Network will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants
|
||||
opt pass).
|
||||
@ -1219,6 +1236,67 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
|
||||
};
|
||||
|
||||
for (auto& inst : _exec_order) {
|
||||
// Load binary dump for input layers
|
||||
GPU_DEBUG_IF(!debug_config->load_layers_raw_dump.empty()) {
|
||||
const std::string layer_name = inst->id();
|
||||
auto files = debug_config->get_filenames_for_matched_layer_loading_binaries(layer_name);
|
||||
if (!files.empty()) {
|
||||
if (inst->is_input()) {
|
||||
// Loading binary dumps for output tensors of input-layers : only one output exists or index(dstN) exists
|
||||
auto dump_file = debug_config->get_matched_from_filelist(files, "_dst0__");
|
||||
OPENVINO_ASSERT((files.size() == 1 || dump_file.length() != 0), "Unexpected binary dump for input layer");
|
||||
|
||||
OPENVINO_ASSERT(files.size() == get_primitive(inst->id())->outputs_memory_count(), "Mis-match dump file count");
|
||||
|
||||
for (size_t i = 0; i < get_primitive(inst->id())->outputs_memory_count(); i++) {
|
||||
auto dump_file = files[0];
|
||||
if (files.size() > 1 || get_primitive(inst->id())->outputs_memory_count() != 1) {
|
||||
std::string pattern = "_dst" + std::to_string(i) + "__";
|
||||
dump_file = debug_config->get_matched_from_filelist(files, pattern);
|
||||
}
|
||||
OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_dst[N]__' for binary dump");
|
||||
GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for " << layer_name << std::endl;
|
||||
|
||||
std::vector<uint8_t> bin = ov::util::load_binary(dump_file);
|
||||
OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_GPU_LoadDumpRawBinary : " + dump_file);
|
||||
|
||||
auto output_mem = get_primitive(layer_name)->output_memory_ptr(i);
|
||||
OPENVINO_ASSERT(output_mem->size() == bin.size(), "memory size mis-match for OV_GPU_LoadDumpRawBinary : " + layer_name);
|
||||
|
||||
output_mem->copy_from(get_stream(), static_cast<void *>(&bin[0]), true);
|
||||
}
|
||||
} else {
|
||||
auto check_dst = debug_config->get_matched_from_filelist(files, "_dst0__");
|
||||
OPENVINO_ASSERT(check_dst.length() == 0, "Expected to load binaries for inputs of " + layer_name);
|
||||
|
||||
// Loading input tensors for any layer
|
||||
auto dump_file = debug_config->get_matched_from_filelist(files, "_src0__");
|
||||
OPENVINO_ASSERT(dump_file.length() != 0, "Could not find expected pattern '_src[N]__' for binary dump input : " + layer_name);
|
||||
|
||||
OPENVINO_ASSERT(files.size() == get_primitive(inst->id())->dependencies().size(), "Mis-match dump file count");
|
||||
|
||||
for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
|
||||
auto dump_file = files[0];
|
||||
if (files.size() > 1 || get_primitive(inst->id())->dependencies().size() != 1) {
|
||||
std::string pattern = "_src" + std::to_string(i) + "__";
|
||||
dump_file = debug_config->get_matched_from_filelist(files, pattern);
|
||||
}
|
||||
OPENVINO_ASSERT((dump_file.length() > 0), "Could not find expected pattern '_src[N]__' for binary dump input");
|
||||
GPU_DEBUG_COUT << " Load binary dump : " << dump_file << " for input of " << layer_name << std::endl;
|
||||
|
||||
std::vector<uint8_t> bin = ov::util::load_binary(dump_file);
|
||||
OPENVINO_ASSERT(!bin.empty(), "Failure loading binary from OV_GPU_LoadDumpRawBinary : " + dump_file);
|
||||
|
||||
auto input_mem = get_primitive(inst->id())->dep_memory_ptr(i);
|
||||
OPENVINO_ASSERT(input_mem->size() == bin.size(), "memory size mis-match for OV_GPU_LoadDumpRawBinary : " + layer_name);
|
||||
|
||||
input_mem->copy_from(get_stream(), static_cast<void *>(&bin[0]), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dump input buffers of 'inst'
|
||||
GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
|
||||
const std::string layer_name = inst->id();
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
@ -1226,36 +1304,74 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
|
||||
}
|
||||
|
||||
GPU_DEBUG_IF(debug_config->is_target_iteration(curr_iter) &&
|
||||
debug_config->dump_layers_dst_only == 0 && debug_config->is_dumped_layer(layer_name)) {
|
||||
debug_config->dump_layers_dst_only == 0 && debug_config->is_layer_for_dumping(layer_name)) {
|
||||
std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":";
|
||||
for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
|
||||
log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i),
|
||||
get_stream(),
|
||||
"program" + std::to_string((get_program() != nullptr) ? get_program()->get_id() : 0) +
|
||||
"_network" + std::to_string(get_id()) +
|
||||
"_" + get_iteration_prefix(curr_iter) +
|
||||
layer_name + "_src" + std::to_string(i),
|
||||
debug_config->dump_layers_raw);
|
||||
std::string name = "program" + std::to_string((get_program() != nullptr) ? get_program()->get_id() : 0) +
|
||||
"_network" + std::to_string(get_id()) +
|
||||
"_" + get_iteration_prefix(curr_iter) +
|
||||
layer_name + "_src" + std::to_string(i);
|
||||
auto input_mem = get_primitive(inst->id())->dep_memory_ptr(i);
|
||||
GPU_DEBUG_IF(debug_config->dump_layers_binary) {
|
||||
// Binary dump : raw
|
||||
auto input_layout = inst->get_input_layout(i);
|
||||
auto filename = get_file_path_for_binary_dump(input_layout, name);
|
||||
|
||||
mem_lock<char, mem_lock_type::read> lock(input_mem, get_stream());
|
||||
ov::util::save_binary(filename, lock.data(), input_mem->size());
|
||||
GPU_DEBUG_COUT << " Dump layer src : " << layer_name << " to " << filename << std::endl;
|
||||
debug_str_for_bin_load += (filename + ",");
|
||||
} else {
|
||||
log_memory_to_file(input_mem,
|
||||
get_stream(),
|
||||
name,
|
||||
debug_config->dump_layers_raw);
|
||||
}
|
||||
}
|
||||
|
||||
GPU_DEBUG_IF(debug_config->dump_layers_binary && !inst->is_input()) {
|
||||
debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"';
|
||||
GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
execute_primitive(inst, events);
|
||||
|
||||
// Dump output buffers of 'inst'
|
||||
GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
|
||||
get_stream().finish();
|
||||
const std::string layer_name = inst->id();
|
||||
auto prog_id = ((get_program() != nullptr) ? get_program()->get_id() : 0);
|
||||
auto net_id = get_id();
|
||||
GPU_DEBUG_IF(debug_config->is_target_iteration(curr_iter) &&
|
||||
debug_config->is_dumped_layer(layer_name, inst->is_output())) {
|
||||
for (size_t i = 0; i < get_primitive(inst->id())->outputs_memory_count(); i++) {
|
||||
log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(i),
|
||||
get_stream(),
|
||||
"program" + std::to_string(prog_id) +
|
||||
"_network" + std::to_string(net_id) +
|
||||
"_" + get_iteration_prefix(curr_iter) +
|
||||
layer_name + "_dst" + std::to_string(i),
|
||||
debug_config->dump_layers_raw);
|
||||
debug_config->is_layer_for_dumping(layer_name, inst->is_output(), inst->is_input())) {
|
||||
std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\""
|
||||
+ layer_name + ":";
|
||||
for (size_t i = 0; i < get_primitive(layer_name)->outputs_memory_count(); i++) {
|
||||
std::string name = "program" + std::to_string(prog_id) +
|
||||
"_network" + std::to_string(net_id) +
|
||||
"_" + get_iteration_prefix(curr_iter) +
|
||||
layer_name + "_dst" + std::to_string(i);
|
||||
auto output_mem = get_primitive(layer_name)->output_memory_ptr(i);
|
||||
GPU_DEBUG_IF(debug_config->dump_layers_binary) {
|
||||
// Binary dump : raw
|
||||
auto output_layout = inst->get_output_layout(i);
|
||||
auto filename = get_file_path_for_binary_dump(output_layout, name);
|
||||
|
||||
mem_lock<char, mem_lock_type::read> lock(output_mem, get_stream());
|
||||
ov::util::save_binary(filename, lock.data(), output_mem->size());
|
||||
GPU_DEBUG_COUT << " Dump layer dst : " << layer_name << " to " << filename << std::endl;
|
||||
debug_str_for_bin_load += (filename + ",");
|
||||
} else {
|
||||
// Text dump
|
||||
log_memory_to_file(output_mem, get_stream(), name, debug_config->dump_layers_raw);
|
||||
}
|
||||
}
|
||||
|
||||
GPU_DEBUG_IF(debug_config->dump_layers_binary && inst->is_input()) {
|
||||
debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"';
|
||||
GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -119,15 +119,17 @@ static void print_help_messages() {
|
||||
message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space."
|
||||
" Support case-insensitive and regular expression. For example .*conv.*");
|
||||
message_list.emplace_back("OV_GPU_DumpLayersResult", "Dump output buffers of result layers only");
|
||||
message_list.emplace_back("OV_GPU_DumpLayersInput", "Dump intermediate buffers of input layers only");
|
||||
message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers");
|
||||
message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump");
|
||||
message_list.emplace_back("OV_GPU_DumpLayersRaw", "If true, dump data is stored in raw memory format.");
|
||||
message_list.emplace_back("OV_GPU_DumpLayersRawBinary", "If true, dump data is stored in binary format.");
|
||||
message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path");
|
||||
message_list.emplace_back("OV_GPU_BaseBatchForMemEstimation", "Base batch size to be used in memory estimation");
|
||||
message_list.emplace_back("OV_GPU_AfterProc", "Run inference after the specified process PIDs are finished, separated by space."
|
||||
" Supported on only on linux.");
|
||||
message_list.emplace_back("OV_GPU_SerialCompile", "Serialize creating primitives and compiling kernels");
|
||||
message_list.emplace_back("OV_GPU_ForceImplTypes", "Force implementation type of a target primitive or layer. [primitive or layout_name]:[impl_type]"
|
||||
message_list.emplace_back("OV_GPU_ForceImplTypes", "Force implementation type of a target primitive or layer. [primitive or layer_name]:[impl_type]"
|
||||
" For example fc:onednn gemm:onednn reduce:ocl do:cpu"
|
||||
" For primitives fc, gemm, do, reduce, concat are supported. Separated by space.");
|
||||
message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels");
|
||||
@ -139,6 +141,11 @@ static void print_help_messages() {
|
||||
"the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), "
|
||||
"max per-dim allowed diff(int), unconditional buffers preallocation ratio(float). For example for disabling memory"
|
||||
"preallocation at all, you can use OV_GPU_MemPreallocationOptions='0 0 0 1.0'");
|
||||
message_list.emplace_back("OV_GPU_LoadDumpRawBinary",
|
||||
"Specified layers which are loading dumped binary files generated by OV_GPU_DumpLayersRawBinary debug-config."
|
||||
" Currently, other layers except input-layer('parameter' type) are loading binaries for only input."
|
||||
" Different input or output tensors are seperated by ','. Different layers are separated by space. For example, "
|
||||
" \"[input_layer_name1]:[binary_dumped_file1],[binary_dump_file2] [input_layer_name2]:[binary_dump_1],[binary_dump_2]\"");
|
||||
|
||||
auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(),
|
||||
[](std::pair<std::string, std::string>& a, std::pair<std::string, std::string>& b){
|
||||
@ -170,8 +177,10 @@ debug_configuration::debug_configuration()
|
||||
, dry_run_path(std::string())
|
||||
, dump_layers_dst_only(0)
|
||||
, dump_layers_result(0)
|
||||
, dump_layers_input(0)
|
||||
, dump_layers_limit_batch(std::numeric_limits<int>::max())
|
||||
, dump_layers_raw(0)
|
||||
, dump_layers_binary(0)
|
||||
, base_batch_for_memory_estimation(-1)
|
||||
, serialize_compile(0)
|
||||
, max_kernels_per_batch(0)
|
||||
@ -190,8 +199,10 @@ debug_configuration::debug_configuration()
|
||||
get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
|
||||
get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch);
|
||||
get_gpu_debug_env_var("DumpLayersRaw", dump_layers_raw);
|
||||
get_gpu_debug_env_var("DumpLayersRawBinary", dump_layers_binary);
|
||||
get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
|
||||
get_gpu_debug_env_var("DumpLayersResult", dump_layers_result);
|
||||
get_gpu_debug_env_var("DumpLayersInput", dump_layers_input);
|
||||
get_gpu_debug_env_var("DisableOnednn", disable_onednn);
|
||||
get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops);
|
||||
get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data);
|
||||
@ -212,6 +223,8 @@ debug_configuration::debug_configuration()
|
||||
get_gpu_debug_env_var("DumpIteration", dump_iteration_str);
|
||||
std::string mem_preallocation_params_str;
|
||||
get_gpu_debug_env_var("MemPreallocationOptions", mem_preallocation_params_str);
|
||||
std::string load_dump_raw_bin_str;
|
||||
get_gpu_debug_env_var("LoadDumpRawBinary", load_dump_raw_bin_str);
|
||||
|
||||
if (help > 0) {
|
||||
print_help_messages();
|
||||
@ -219,7 +232,8 @@ debug_configuration::debug_configuration()
|
||||
}
|
||||
|
||||
if (dump_layers_str.length() > 0) {
|
||||
dump_layers_str = " " + dump_layers_str + " "; // Insert delimiter for easier parsing when used
|
||||
// Insert delimiter for easier parsing when used
|
||||
dump_layers_str = " " + dump_layers_str + " ";
|
||||
std::stringstream ss(dump_layers_str);
|
||||
std::string layer;
|
||||
while (ss >> layer) {
|
||||
@ -228,7 +242,7 @@ debug_configuration::debug_configuration()
|
||||
}
|
||||
|
||||
if (forced_impl_types_str.length() > 0) {
|
||||
forced_impl_types_str = " " + forced_impl_types_str + " "; // Insert delimiter for easier parsing when used
|
||||
forced_impl_types_str = " " + forced_impl_types_str + " ";
|
||||
std::stringstream ss(forced_impl_types_str);
|
||||
std::string type;
|
||||
while (ss >> type) {
|
||||
@ -236,6 +250,16 @@ debug_configuration::debug_configuration()
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing for loading binary files
|
||||
if (load_dump_raw_bin_str.length() > 0) {
|
||||
load_dump_raw_bin_str = " " + load_dump_raw_bin_str + " ";
|
||||
std::stringstream ss(load_dump_raw_bin_str);
|
||||
std::string type;
|
||||
while (ss >> type) {
|
||||
load_layers_raw_dump.push_back(type);
|
||||
}
|
||||
}
|
||||
|
||||
if (dump_iteration_str.size() > 0) {
|
||||
dump_iteration_str = " " + dump_iteration_str + " ";
|
||||
std::istringstream ss(dump_iteration_str);
|
||||
@ -305,12 +329,77 @@ const debug_configuration *debug_configuration::get_instance() {
|
||||
#endif
|
||||
}
|
||||
|
||||
bool debug_configuration::is_dumped_layer(const std::string& layer_name, bool is_output) const {
|
||||
std::vector<std::string> debug_configuration::get_filenames_for_matched_layer_loading_binaries(const std::string& id) const {
|
||||
std::vector<std::string> file_names;
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
if (load_layers_raw_dump.empty())
|
||||
return file_names;
|
||||
|
||||
for (const auto& load_layer : load_layers_raw_dump) {
|
||||
size_t file = load_layer.rfind(":");
|
||||
if (file != std::string::npos) {
|
||||
if (id == load_layer.substr(0, file)) {
|
||||
auto file_name_str = load_layer.substr(file + 1);
|
||||
size_t head = 0;
|
||||
size_t found = 0;
|
||||
do {
|
||||
found = file_name_str.find(",", head);
|
||||
if (found != std::string::npos)
|
||||
file_names.push_back(file_name_str.substr(head, (found - head)));
|
||||
else
|
||||
file_names.push_back(file_name_str.substr(head));
|
||||
|
||||
head = found+1;
|
||||
GPU_DEBUG_LOG << " Layer name loading raw dump : " << load_layer.substr(0, file) << " / the dump file : "
|
||||
<< file_names.back() << std::endl;
|
||||
} while (found != std::string::npos);
|
||||
|
||||
return file_names;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return file_names;
|
||||
}
|
||||
|
||||
std::string debug_configuration::get_matched_from_filelist(const std::vector<std::string>& file_names, std::string pattern) const {
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
for (const auto& file : file_names) {
|
||||
auto found = file.find(pattern);
|
||||
if (found != std::string::npos) {
|
||||
return file;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return std::string();
|
||||
}
|
||||
|
||||
std::string debug_configuration::get_name_for_dump(const std::string& file_name) const {
|
||||
std::string filename = file_name;
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
std::replace(filename.begin(), filename.end(), '\\', '_');
|
||||
std::replace(filename.begin(), filename.end(), '/', '_');
|
||||
std::replace(filename.begin(), filename.end(), ' ', '_');
|
||||
std::replace(filename.begin(), filename.end(), ':', '_');
|
||||
#endif
|
||||
return filename;
|
||||
}
|
||||
|
||||
bool debug_configuration::is_layer_for_dumping(const std::string& layer_name, bool is_output, bool is_input) const {
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
// Dump result layer
|
||||
if (is_output == true && dump_layers_result == 1 &&
|
||||
(layer_name.find("constant:") == std::string::npos))
|
||||
return true;
|
||||
if (dump_layers.empty() && dump_layers_result == 0)
|
||||
// Dump all layers
|
||||
if (dump_layers.empty() && dump_layers_result == 0 && dump_layers_input == 0)
|
||||
return true;
|
||||
|
||||
// Dump input layers
|
||||
size_t pos = layer_name.find(':');
|
||||
auto type = layer_name.substr(0, pos);
|
||||
if (is_input == true && type == "parameter" && dump_layers_input == 1)
|
||||
return true;
|
||||
|
||||
auto is_match = [](const std::string& layer_name, const std::string& pattern) -> bool {
|
||||
@ -328,6 +417,7 @@ bool debug_configuration::is_dumped_layer(const std::string& layer_name, bool is
|
||||
std::regex re(upper_pattern);
|
||||
return std::regex_match(upper_layer_name, re);
|
||||
};
|
||||
|
||||
auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){
|
||||
return is_match(layer_name, dl);
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user