[GPU] Debug config improvement (#6575)

2021-07-09 18:23:16 +09:00 · 2021-07-09 18:23:16 +09:00 · 0d775269ea
commit 0d775269ea
parent 98148539b3
6 changed files with 117 additions and 42 deletions
--- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/debug_configuration.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/debug_configuration.hpp
@ -24,8 +24,13 @@ private:
    debug_configuration();
 public:
    static const char *prefix;
-    int verbose;
+    int verbose;                    // Verbose execution
-    std::string dump_graphs;
+    int print_multi_kernel_perf;    // Print execution time of each kernel in multi-kernel primitimive
    int disable_usm;                // Disable usm usage
    std::string dump_graphs;        // Dump optimized graph
    std::string dump_layers_path;   // Enable dumping intermediate buffers and set the dest path
    std::string dump_layers;        // Dump intermediate buffers of specified layers only, separated by space
    int dump_layers_dst_only;       // Dump only output of layers
    static const debug_configuration *get_instance();
 };
--- a/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
@ -35,10 +35,26 @@ static void get_str_env(const std::string &var, std::string &val) {
 debug_configuration::debug_configuration()
        : verbose(0)
-        , dump_graphs(std::string()) {
+        , print_multi_kernel_perf(0)
        , disable_usm(0)
        , dump_graphs(std::string())
        , dump_layers_path(std::string())
        , dump_layers(std::string())
        , dump_layers_dst_only(0) {
 #ifdef GPU_DEBUG_CONFIG
    get_int_env("OV_GPU_Verbose", verbose);
    get_int_env("OV_GPU_PrintMultiKernelPerf", print_multi_kernel_perf);
    get_int_env("OV_GPU_DisableUsm", disable_usm);
    get_str_env("OV_GPU_DumpGraphs", dump_graphs);
    get_str_env("OV_GPU_DumpLayersPath", dump_layers_path);
    get_str_env("OV_GPU_DumpLayers", dump_layers);
    get_int_env("OV_GPU_DumpLayersDstOnly", dump_layers_dst_only);
    if (dump_layers_path.length() > 0 && !disable_usm) {
        disable_usm = 1;
        GPU_DEBUG_COUT << "DisableUsm=1 because of DumpLayersPath" << std::endl;
    }
    if (dump_layers.length() > 0)
        dump_layers = " " + dump_layers + " "; // Insert delimiter for easier parsing when used
 #endif
 }
--- a/inference-engine/thirdparty/clDNN/runtime/engine.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/engine.cpp
@ -7,6 +7,7 @@
 #include "cldnn/runtime/memory.hpp"
 #include "cldnn/runtime/stream.hpp"
 #include "cldnn/runtime/device_query.hpp"
 #include "cldnn/runtime/debug_configuration.hpp"
 #include "ocl/ocl_engine_factory.hpp"
@ -32,6 +33,10 @@ const device::ptr engine::get_device() const {
 }
 bool engine::use_unified_shared_memory() const {
    GPU_DEBUG_GET_INSTANCE(debug_config);
    GPU_DEBUG_IF(debug_config->disable_usm) {
        return false;
    }
    if (_device->get_mem_caps().supports_usm() && _configuration.use_unified_shared_memory) {
        return true;
    }
--- a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_event.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_event.cpp
@ -3,6 +3,7 @@
 //
 #include "ocl_event.hpp"
 #include "cldnn/runtime/debug_configuration.hpp"
 #include <cassert>
 #include <iostream>
@ -175,6 +176,17 @@ bool ocl_events::get_profiling_info_impl(std::list<instrumentation::profiling_in
        for (auto& duration : all_durations[period.name]) {
            sum += (duration.second - duration.first);
        }
        GPU_DEBUG_GET_INSTANCE(debug_config);
        GPU_DEBUG_IF(debug_config->print_multi_kernel_perf) {
            if (0 == strcmp(period.name, "executing")) {
                GPU_DEBUG_COUT << "Multi-kernel time: ";
                for (auto& duration : all_durations[period.name])
                    std::cout << "  " << (duration.second - duration.first) / 1000;
                std::cout << " Total " << sum / 1000 << std::endl;
            }
        }
        info.push_back(get_profiling_interval(period.name, 0, sum));
    }
--- a/inference-engine/thirdparty/clDNN/src/network.cpp
+++ b/inference-engine/thirdparty/clDNN/src/network.cpp
@ -33,15 +33,9 @@
 #include <utility>
 #include <map>
-// #define DEBUG_DUMP_PATH "cldnn_dump/"
+#ifdef GPU_DEBUG_CONFIG
 #ifdef DEBUG_DUMP_PATH
 #include <iomanip>
 #include <fstream>
 #define DUMP_VERBOSE 0
 #define DUMP_SINGLE_LAYER 0
 #define DUMP_LAYER_NAME ""
 #endif
 namespace cldnn {
@ -131,7 +125,7 @@ std::map<primitive_id, network_output> network::execute(const std::vector<event:
    return result;
 }
-#ifdef DEBUG_DUMP_PATH
+#ifdef GPU_DEBUG_CONFIG
 static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false) {
 #if defined HALF_HALF_HPP
    return val;
@ -180,6 +174,19 @@ float convert_element(float f) { return f; }
 float convert_element(half_t h) { return convert_half_to_float(h); }
 static size_t get_x_pitch(const layout& layout) {
    try {
        auto tensor_x0 = tensor(batch(0), feature(0), spatial(0, 0, 0, 0));
        auto tensor_x1 = tensor(batch(0), feature(0), spatial(1, 0, 0, 0));
        auto x0 = layout.get_linear_offset(tensor_x0);
        auto x1 = layout.get_linear_offset(tensor_x1);
        return (x1 - x0);
    } catch (...) {
        // When spatial size of x=0, x_pitch is meaningless
        return 0;
    }
 }
 template <class T>
 static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
    auto&& size = mem->get_layout().size;
@ -189,6 +196,8 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
    mem_lock<T> lock(mem, stream);
    auto mem_ptr = lock.data();
    auto x_pitch = get_x_pitch(mem->get_layout());
    std::stringstream buffer;
    for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) {
        for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) {
@ -196,10 +205,11 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
                for (cldnn::tensor::value_type w = 0; w < size.spatial[3]; ++w) {
                    for (cldnn::tensor::value_type z = 0; z < size.spatial[2]; ++z) {
                        for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) {
-                            for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) {
+                            cldnn::tensor t(cldnn::group(g), cldnn::batch(b), cldnn::feature(f), cldnn::spatial(0, y, z, w));
-                                cldnn::tensor t(cldnn::group(g), cldnn::batch(b), cldnn::feature(f), cldnn::spatial(x, y, z, w));
+                            size_t input_it = mem->get_layout().get_linear_offset(t);
-                                size_t input_it = mem->get_layout().get_linear_offset(t);
+
-                                file_stream << std::fixed << std::setprecision(6) << convert_element(mem_ptr[input_it]) << std::endl;
+                            for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x, input_it += x_pitch) {
                                buffer << std::fixed << std::setprecision(6) << convert_element(mem_ptr[input_it]) << std::endl;
                            }
                        }
                    }
@ -207,6 +217,7 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
            }
        }
    }
    file_stream << buffer.str();
 }
 template <>
 void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
@ -238,12 +249,13 @@ void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream)
 }
 static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
    GPU_DEBUG_GET_INSTANCE(debug_config);
    std::string filename = layerName;
    std::replace(filename.begin(), filename.end(), '\\', '_');
    std::replace(filename.begin(), filename.end(), '/', '_');
    std::replace(filename.begin(), filename.end(), ' ', '_');
    std::replace(filename.begin(), filename.end(), ':', '_');
-        filename = DEBUG_DUMP_PATH + filename + ".txt";
+        filename = debug_config->dump_layers_path + filename + ".txt";
    std::ofstream file_stream(filename);
    auto mem_dt = mem->get_layout().data_type;
@ -260,6 +272,12 @@ static void log_memory_to_file(memory::ptr mem, stream& stream, std::string laye
    else if (mem_dt == cldnn::data_types::u8)
        dump<uint8_t>(mem, stream, file_stream);
 }
 #else
 static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
    (void)mem;
    (void)stream;
    (void)layerName;
 }
 #endif
 /*
 Network_impl will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants
@ -487,25 +505,24 @@ void network_impl::execute(const std::vector<event::ptr>& events) {
    set_arguments();
    for (auto& inst : _exec_order) {
-#ifdef DEBUG_DUMP_PATH
+        GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
-        auto& node = _program->get_node(inst->id());
+            auto& node = _program->get_node(inst->id());
-
+            std::string layer_name = node.id();
-        std::string layer_name = node.id();
+            GPU_DEBUG_IF(debug_config->verbose >= 2) {
-#if DUMP_VERBOSE
+                std::cerr << get_primitive_info(inst->id()) << std::endl;
-        std::cerr << get_primitive_info(inst->id()) << std::endl;
+            }
-#endif
+
-#if DUMP_SINGLE_LAYER
+            GPU_DEBUG_IF(debug_config->dump_layers_dst_only == 0 &&
-        if (layer_name == DUMP_LAYER_NAME) {
+                            (debug_config->dump_layers.length() == 0 ||
-#endif
+                            (debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos))) {
-            std::cerr << "Dump " << layer_name << " layer" << std::endl;
+                std::cout << "Dump " << layer_name << " layer src" << std::endl;
-            for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
+                for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
-                log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(),
+                    log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(),
-                                   layer_name + "_src_" + std::to_string(i));
+                                    layer_name + "_src_" + std::to_string(i));
                }
            }
 #if DUMP_SINGLE_LAYER
        }
-#endif
+
 #endif
        GPU_DEBUG_IF(debug_config->verbose >= 1) {
            GPU_DEBUG_COUT << "Execute " << inst->id() << std::endl;
        }
@ -517,16 +534,16 @@ void network_impl::execute(const std::vector<event::ptr>& events) {
        }
        execute_primitive(inst, events);
-#ifdef DEBUG_DUMP_PATH
+        GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
-        get_stream().finish();
+            get_stream().finish();
-#if DUMP_SINGLE_LAYER
+            auto& node = _program->get_node(inst->id());
-        if (layer_name == DUMP_LAYER_NAME)
+            std::string layer_name = node.id();
-#endif
+            GPU_DEBUG_IF(debug_config->dump_layers.length() == 0 ||
-        {
+                        (debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos)) {
-            log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0");
+                std::cout << "Dump " << layer_name << " layer dst" << std::endl;
                log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0");
            }
        }
 #endif
    }
    for (auto& inst : _program->get_processing_order()) {
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/debug_config_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/debug_config_gpu_test.cpp
@ -0,0 +1,20 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include "test_utils/test_utils.h"
 #include "cldnn/runtime/debug_configuration.hpp"
 using namespace cldnn;
 using namespace ::tests;
 TEST(debug_config_test, check_debug_config_off_on_release) {
 #ifdef NDEBUG
    GPU_DEBUG_GET_INSTANCE(debug_config);
    GPU_DEBUG_IF(1) {
        GTEST_FAIL();   /* This should be disabled in case of release build */
    }
 #endif
 }