[GPU] Update Debug config for GPU plugin (#11983)

+ Added OV_GPU_DumpLayersResult + Applied minor update Signed-off-by: Min, Byungil <byungil.min@intel.com>
2022-06-29 17:41:13 +09:00
parent 563d4f16e6
commit 730c3f8f25
4 changed files with 18 additions and 8 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
@@ -36,13 +36,14 @@ public:
    std::vector<std::string> dump_layers;        // Dump intermediate buffers of specified layers only
    std::string dry_run_path;       // Dry run and serialize execution graph into the specified path
    int dump_layers_dst_only;       // Dump only output of layers
+    int dump_layers_result;         // Dump result layers
    int dump_layers_limit_batch;    // Limit the size of batch to dump
    int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation
    std::vector<std::string> after_proc; // Start inference after the listed processes
    int serialize_compile;          // Serialize creating primitives and compiling kernels
    std::string forced_impl_type; // Force implementation type either ocl or onednn
    static const debug_configuration *get_instance();
-    bool is_dumped_layer(const std::string& layerName) const;
+    bool is_dumped_layer(const std::string& layerName, bool is_output = false) const;
 };

 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -1405,7 +1405,8 @@ impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node)
                preferred_type = impl_types::cpu;

            if (node.id() == forced_impl_type.substr(0, found_type)) {
-                std::cout << "  >>> " << forced_impl_type.substr(0, found_type) << " : " << forced_impl_type.substr(found_type + 1) << std::endl;
+                GPU_DEBUG_COUT << " Forced implementation type : " << forced_impl_type.substr(0, found_type) << " : "
+                    << forced_impl_type.substr(found_type + 1) << std::endl;
                return preferred_type;
            }
        }
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -694,7 +694,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
            get_stream().finish();
            auto& node = _program->get_node(inst->id());
            const std::string layer_name = node.id();
-            GPU_DEBUG_IF(debug_config->is_dumped_layer(layer_name)) {
+            GPU_DEBUG_IF(debug_config->is_dumped_layer(layer_name, node.is_output())) {
                log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0");
            }
        }
--- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
+++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
@@ -110,6 +110,7 @@ static void print_help_messages() {
    message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources");
    message_list.emplace_back("OV_GPU_DumpLayersPath", "Enable dumping intermediate buffers and set the dest path");
    message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space");
+    message_list.emplace_back("OV_GPU_DumpLayersResult", "Dump output buffers of result layers only");
    message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers");
    message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump");
    message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path");
@@ -117,8 +118,8 @@ static void print_help_messages() {
    message_list.emplace_back("OV_GPU_AfterProc", "Run inference after the specified process PIDs are finished, separated by space."
                              " Supported on only on linux.");
    message_list.emplace_back("OV_GPU_SerialCompile", "Serialize creating primitives and compiling kernels");
-    message_list.emplace_back("OV_GPU_ForceImplType", "Force implementation type, either ocl or onednn, of a target primitive. [primitive]:[impl_type]"
-                              " Currently, only fc:onednn and fc:cldnn are supported.");
+    message_list.emplace_back("OV_GPU_ForceImplType", "Force implementation type of a target primitive or layer. [primitive or layout_name]:[impl_type]"
+                              "For primitives, fc:onednn, fc:ocl, do:cpu, do:ocl, reduce:ocl and reduce:onednn are supported");

    auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(),
        [](std::pair<std::string, std::string>& a, std::pair<std::string, std::string>& b){
@@ -143,6 +144,7 @@ debug_configuration::debug_configuration()
        , dump_sources(std::string())
        , dump_layers_path(std::string())
        , dump_layers_dst_only(0)
+        , dump_layers_result(0)
        , dry_run_path(std::string())
        , disable_onednn(0)
        , dump_layers_limit_batch(std::numeric_limits<int>::max())
@@ -157,8 +159,9 @@ debug_configuration::debug_configuration()
    get_gpu_debug_env_var("DumpGraphs", dump_graphs);
    get_gpu_debug_env_var("DumpSources", dump_sources);
    get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
-    get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
    get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch);
+    get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
+    get_gpu_debug_env_var("DumpLayersResult", dump_layers_result);
    get_gpu_debug_env_var("DisableOnednn", disable_onednn);
    get_gpu_debug_env_var("DryRunPath", dry_run_path);
    get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
@@ -211,9 +214,14 @@ const debug_configuration *debug_configuration::get_instance() {
 #endif
 }

-bool debug_configuration::is_dumped_layer(const std::string& layerName) const {
+bool debug_configuration::is_dumped_layer(const std::string& layerName, bool is_output) const {
 #ifdef GPU_DEBUG_CONFIG
-    if (dump_layers.empty()) return true;
+    if (is_output == true && dump_layers_result == 1 &&
+        (layerName.find("constant:") == std::string::npos))
+        return true;
+    if (dump_layers.empty() && dump_layers_result == 0)
+        return true;
+
    auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){
        return (layerName.compare(dl) == 0);
    });