From 730c3f8f2545738f817fa4feda1a844a6cc582be Mon Sep 17 00:00:00 2001 From: "Min, Byungil" Date: Wed, 29 Jun 2022 17:41:13 +0900 Subject: [PATCH] [GPU] Update Debug config for GPU plugin (#11983) + Added OV_GPU_DumpLayersResult + Applied minor update Signed-off-by: Min, Byungil --- .../intel_gpu/runtime/debug_configuration.hpp | 3 ++- .../intel_gpu/src/graph/layout_optimizer.cpp | 3 ++- src/plugins/intel_gpu/src/graph/network.cpp | 2 +- .../src/runtime/debug_configuration.cpp | 18 +++++++++++++----- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index d921c9646bf..0dab5038c38 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -36,13 +36,14 @@ public: std::vector dump_layers; // Dump intermediate buffers of specified layers only std::string dry_run_path; // Dry run and serialize execution graph into the specified path int dump_layers_dst_only; // Dump only output of layers + int dump_layers_result; // Dump result layers int dump_layers_limit_batch; // Limit the size of batch to dump int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation std::vector after_proc; // Start inference after the listed processes int serialize_compile; // Serialize creating primitives and compiling kernels std::string forced_impl_type; // Force implementation type either ocl or onednn static const debug_configuration *get_instance(); - bool is_dumped_layer(const std::string& layerName) const; + bool is_dumped_layer(const std::string& layerName, bool is_output = false) const; }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index e8f7b1f0365..3b9132b5a1e 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1405,7 +1405,8 @@ impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) preferred_type = impl_types::cpu; if (node.id() == forced_impl_type.substr(0, found_type)) { - std::cout << " >>> " << forced_impl_type.substr(0, found_type) << " : " << forced_impl_type.substr(found_type + 1) << std::endl; + GPU_DEBUG_COUT << " Forced implementation type : " << forced_impl_type.substr(0, found_type) << " : " + << forced_impl_type.substr(found_type + 1) << std::endl; return preferred_type; } } diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 23038ab507b..ba873f180e9 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -694,7 +694,7 @@ void network::execute_impl(const std::vector& events) { get_stream().finish(); auto& node = _program->get_node(inst->id()); const std::string layer_name = node.id(); - GPU_DEBUG_IF(debug_config->is_dumped_layer(layer_name)) { + GPU_DEBUG_IF(debug_config->is_dumped_layer(layer_name, node.is_output())) { log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0"); } } diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index 96e13957a1f..43c4adb1369 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -110,6 +110,7 @@ static void print_help_messages() { message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources"); message_list.emplace_back("OV_GPU_DumpLayersPath", "Enable dumping intermediate buffers and set the dest path"); message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space"); + message_list.emplace_back("OV_GPU_DumpLayersResult", "Dump output buffers of result layers only"); message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers"); message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump"); message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path"); @@ -117,8 +118,8 @@ static void print_help_messages() { message_list.emplace_back("OV_GPU_AfterProc", "Run inference after the specified process PIDs are finished, separated by space." " Supported on only on linux."); message_list.emplace_back("OV_GPU_SerialCompile", "Serialize creating primitives and compiling kernels"); - message_list.emplace_back("OV_GPU_ForceImplType", "Force implementation type, either ocl or onednn, of a target primitive. [primitive]:[impl_type]" - " Currently, only fc:onednn and fc:cldnn are supported."); + message_list.emplace_back("OV_GPU_ForceImplType", "Force implementation type of a target primitive or layer. [primitive or layout_name]:[impl_type]" + "For primitives, fc:onednn, fc:ocl, do:cpu, do:ocl, reduce:ocl and reduce:onednn are supported"); auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(), [](std::pair& a, std::pair& b){ @@ -143,6 +144,7 @@ debug_configuration::debug_configuration() , dump_sources(std::string()) , dump_layers_path(std::string()) , dump_layers_dst_only(0) + , dump_layers_result(0) , dry_run_path(std::string()) , disable_onednn(0) , dump_layers_limit_batch(std::numeric_limits::max()) @@ -157,8 +159,9 @@ debug_configuration::debug_configuration() get_gpu_debug_env_var("DumpGraphs", dump_graphs); get_gpu_debug_env_var("DumpSources", dump_sources); get_gpu_debug_env_var("DumpLayersPath", dump_layers_path); - get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only); get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch); + get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only); + get_gpu_debug_env_var("DumpLayersResult", dump_layers_result); get_gpu_debug_env_var("DisableOnednn", disable_onednn); get_gpu_debug_env_var("DryRunPath", dry_run_path); get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation); @@ -211,9 +214,14 @@ const debug_configuration *debug_configuration::get_instance() { #endif } -bool debug_configuration::is_dumped_layer(const std::string& layerName) const { +bool debug_configuration::is_dumped_layer(const std::string& layerName, bool is_output) const { #ifdef GPU_DEBUG_CONFIG - if (dump_layers.empty()) return true; + if (is_output == true && dump_layers_result == 1 && + (layerName.find("constant:") == std::string::npos)) + return true; + if (dump_layers.empty() && dump_layers_result == 0) + return true; + auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){ return (layerName.compare(dl) == 0); });