[GPU] New debug option to serialize execution of inferences (#9510)

2022-01-14 10:24:40 +09:00 · 2022-01-14 10:24:40 +09:00 · 11cb8ebf55
commit 11cb8ebf55
parent 508af22c66
3 changed files with 47 additions and 0 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
@ -38,6 +38,7 @@ public:
    int dump_layers_dst_only;       // Dump only output of layers
    int dump_layers_limit_batch;    // Limit the size of batch to dump
    int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation
+    std::vector<std::string> after_proc; // Start inference after the listed processes
    static const debug_configuration *get_instance();
    bool is_dumped_layer(const std::string& layerName) const;
 };
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@ -41,6 +41,9 @@
 #ifdef GPU_DEBUG_CONFIG
 #include <iomanip>
 #include <fstream>
+#include <sys/stat.h>
+#include <chrono>
+#include <thread>
 #endif

 namespace cldnn {
@ -203,12 +206,33 @@ static void log_memory_to_file(memory::ptr mem, stream& stream, std::string laye
    else if (mem_dt == cldnn::data_types::u8)
        dump<uint8_t>(mem, stream, file_stream);
 }
+static void wait_for_the_turn() {
+    GPU_DEBUG_GET_INSTANCE(debug_config);
+    bool need_to_wait;
+    do {
+        need_to_wait = false;
+        struct stat buffer;
+        for (auto pid : debug_config->after_proc) {
+            auto path = "/proc/" + pid;
+            std::cout << "check " + path << std::endl;
+            if (stat(path.c_str(), &buffer) == 0) {
+                need_to_wait = true;
+                std::cout << "Being nice.. Wait for process " << pid << std::endl;
+                std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+            }
+        }
+    } while (need_to_wait);
+}
+
 #else
 static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
    (void)mem;
    (void)stream;
    (void)layerName;
 }
+
+static void wait_for_the_turn() {
+}
 #endif

 /*
@ -227,6 +251,11 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo
        net_id = ++id_gen;
    }

+    GPU_DEBUG_GET_INSTANCE(debug_config);
+    GPU_DEBUG_IF(debug_config->after_proc.size() != 0) {
+        wait_for_the_turn();
+    }
+
    allocate_primitives();
    configure_primitives_second_output();
    check_names();
--- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
+++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
@ -114,6 +114,8 @@ static void print_help_messages() {
    message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump");
    message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path");
    message_list.emplace_back("OV_GPU_BaseBatchForMemEstimation", "Base batch size to be used in memory estimation");
+    message_list.emplace_back("OV_GPU_AfterProc", "Run inference after the specified process PIDs are finished, separated by space."
+                              " Supported on only on linux.");

    auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(),
        [](std::pair<std::string, std::string>& a, std::pair<std::string, std::string>& b){
@ -157,6 +159,8 @@ debug_configuration::debug_configuration()
    get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
    std::string dump_layers_str;
    get_gpu_debug_env_var("DumpLayers", dump_layers_str);
+    std::string after_proc_str;
+    get_gpu_debug_env_var("AfterProc", after_proc_str);

    if (help > 0) {
        print_help_messages();
@ -171,6 +175,19 @@ debug_configuration::debug_configuration()
            dump_layers.push_back(layer);
        }
    }
+
+    if (after_proc_str.length() > 0) {
+#ifdef _WIN32
+        GPU_DEBUG_COUT << "Warning: OV_GPU_AfterProc is supported only on linux" << std::endl;
+#else
+        after_proc_str = " " + after_proc_str + " "; // Insert delimiter for easier parsing when used
+        std::stringstream ss(after_proc_str);
+        std::string pid;
+        while (ss >> pid) {
+            after_proc.push_back(pid);
+        }
+#endif
+    }
 #endif
 }