[GPU] dump graphs during execution (#21411)

* dump graphs during execution
This commit is contained in:
Sungeun Kim 2023-12-12 13:16:02 +09:00 committed by GitHub
parent 889f2f54a2
commit 0b49bb99d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 35 additions and 30 deletions

View File

@ -229,9 +229,7 @@ public:
// Reverses connection - user becomes dependency.
void remove_nodes(std::vector<program_node*>& to_remove);
void dump_program(const char* stage,
bool with_full_info,
std::function<bool(program_node const&)> const& filter = nullptr) const;
void dump_program(const char* stage, bool with_full_info) const;
const primitives_info& get_primitives_info() const;
data_types get_inference_precision(const program_node& node) const;

View File

@ -4,6 +4,7 @@
#include "intel_gpu/graph/program.hpp"
#include "program_node.h"
#include "primitive_inst.h"
#include <fstream>
#include <string>
@ -12,6 +13,7 @@ std::string get_dir_path(const ExecutionConfig& config);
void dump_graph_optimized(std::ofstream&, const program&);
void dump_graph_processing_order(std::ofstream&, const program&);
void dump_graph_init(std::ofstream&, const program&, std::function<bool(program_node const&)> const&);
void dump_graph_info(std::ofstream&, const program&, std::function<bool(program_node const&)> const&);
void dump_graph_init(std::ofstream&, const program&,
std::function<std::shared_ptr<primitive_inst>(const primitive_id&)> get_primitive_inst = nullptr);
void dump_graph_info(std::ofstream&, const program&);
} // namespace cldnn

View File

@ -37,6 +37,7 @@
#include "program_helpers.h"
#include "to_string_utils.h"
#include "kernels_cache.hpp"
#include "program_dump_graph.h"
// TODO: Remove once we have an abstraction for kernels_cache
#include "kernel_base.h"
@ -1121,6 +1122,22 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
<< data_shape_str.str() << std::endl;
}
GPU_DEBUG_IF(!debug_config->dump_graphs.empty() && debug_config->is_target_iteration(curr_iter)) {
auto get_fixed_str = [](int value, int length = 2) -> std::string {
std::ostringstream ss;
ss << std::setw(length) << std::setfill('0') << std::to_string(value);
return ss.str();
};
std::string path = get_dir_path(get_config());
if (!path.empty()) {
std::ofstream ofs(path + "cldnn_program_exec_p" + get_fixed_str(get_program()->get_id()) + "_n" + get_fixed_str(get_id())
+ "_" + get_fixed_str(curr_iter, 5) + ".graph");
dump_graph_init(ofs, *get_program(), [&](const primitive_id& id) -> std::shared_ptr<primitive_inst> {
return get_primitive(id);
});
}
}
// Store events only in case of OOO queue or enabled Profiling
auto store_events = is_out_of_order_queue || _enable_profiling;
if (store_events) {

View File

@ -1217,19 +1217,17 @@ void program::remove_nodes(std::vector<program_node*>& to_remove) {
// TODO: break this function into number of smaller ones + add per-primitive fields (possibly use
// primitive_inst::to_string?)
void program::dump_program(const char* stage,
bool with_full_info,
std::function<bool(program_node const&)> const& filter) const {
void program::dump_program(const char* stage, bool with_full_info) const {
std::string path = get_dir_path(_config);
if (path.empty() || !with_full_info) {
return;
}
std::ofstream graph(path + "cldnn_program_" + std::to_string(prog_id) + "_" + stage + ".graph");
dump_graph_init(graph, *this, filter);
dump_graph_init(graph, *this);
graph.open(path + "cldnn_program_" + std::to_string(prog_id) + "_" + stage + ".info");
dump_graph_info(graph, *this, filter);
dump_graph_info(graph, *this);
graph.open(path + "cldnn_program_" + std::to_string(prog_id) + "_" + stage + ".order");
dump_graph_processing_order(graph, *this);

View File

@ -170,10 +170,10 @@ std::string get_dir_path(const ExecutionConfig& config) {
void dump_graph_init(std::ofstream& graph,
const program& program,
std::function<bool(program_node const&)> const& filter) {
std::function<std::shared_ptr<primitive_inst>(const primitive_id&)> get_primitive_inst) {
const std::string invalid_layout_msg = "(invalid layout)";
const auto dump_mem_info = [&invalid_layout_msg](const program_node* ptr) {
const auto dump_mem_info = [&invalid_layout_msg, &get_primitive_inst](const program_node* ptr) {
std::string out = "layout_info: ";
if (!ptr->is_valid_output_layout()) {
return out + invalid_layout_msg;
@ -185,6 +185,9 @@ void dump_graph_init(std::ofstream& graph,
} else {
out += " " + out_layout.to_string();
}
if (get_primitive_inst) {
out += "\nshape: " + get_primitive_inst(ptr->id())->get_output_layout().get_partial_shape().to_string();
}
return out;
};
@ -199,7 +202,8 @@ void dump_graph_init(std::ofstream& graph,
}
auto output_fmts = ptr->get_preferred_output_fmts();
if (!output_fmts.empty()) {
out += "\npreferred_out_fmt";
out += ((out.empty()) ? "" : "\n");
out += "preferred_out_fmt";
for (auto& fmt : output_fmts) {
out += ":" + fmt_to_str(fmt);
}
@ -210,9 +214,6 @@ void dump_graph_init(std::ofstream& graph,
graph << "digraph cldnn_program {\n";
for (auto& node : program.get_processing_order()) {
if (filter && !filter(*node)) {
continue;
}
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wpotentially-evaluated-expression"
@ -259,9 +260,6 @@ void dump_graph_init(std::ofstream& graph,
graph << "];\n";
for (auto& user : node->get_users()) {
if (filter && !filter(*user)) {
continue;
}
bool doubled = true;
auto it = user->get_dependencies().begin();
while (it != user->get_dependencies().end()) {
@ -289,10 +287,6 @@ void dump_graph_init(std::ofstream& graph,
}
for (auto& dep : node->get_dependencies()) {
if (filter && !filter(*dep.first)) {
continue;
}
if (std::find(dep.first->get_users().begin(), dep.first->get_users().end(), node) != dep.first->get_users().end()) {
continue;
}
@ -318,13 +312,8 @@ void dump_graph_optimized(std::ofstream& graph, const program& program) {
close_stream(graph);
}
void dump_graph_info(std::ofstream& graph,
const program& program,
std::function<bool(program_node const&)> const& filter) {
void dump_graph_info(std::ofstream& graph, const program& program) {
for (auto& node : program.get_processing_order()) {
if (filter && !filter(*node))
continue;
dump_full_node(graph, node);
graph << std::endl << std::endl;
}

View File

@ -115,7 +115,8 @@ static void print_help_messages() {
message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory."
" Please use OV_GPU_DumpProfilingDataPerIter=1 env variable to collect performance per iteration."
" Note: Performance impact may be significant as this option enforces host side sync after each primitive");
message_list.emplace_back("OV_GPU_DumpGraphs", "Dump optimized graph");
message_list.emplace_back("OV_GPU_DumpGraphs", "1) dump ngraph before and after transformation. 2) dump graph in model compiling."
"3) dump graph in execution.");
message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources");
message_list.emplace_back("OV_GPU_DumpLayersPath", "Enable dumping intermediate buffers and set the dest path");
message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space."