[GPU] Add debug config for disabled async compilation (#18535)

2023-07-14 15:42:03 +09:00
parent cba84fd763
commit 38913f2184
4 changed files with 39 additions and 20 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
@@ -117,6 +117,7 @@ public:
    int serialize_compile;                      // Serialize creating primitives and compiling kernels
    std::vector<std::string> forced_impl_types; // Force implementation type either ocl or onednn
    int max_kernels_per_batch;                  // Maximum number of kernels in a batch during compiling kernels
+    int disable_async_compilation;              // Disable async compilation
    std::set<int64_t> dump_iteration;           // Dump n-th execution of network.
    static const debug_configuration *get_instance();
    bool is_dumped_layer(const std::string& layerName, bool is_output = false) const;
--- a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h
@@ -100,7 +100,13 @@ struct primitive_type_base : primitive_type {

    cldnn::layout calc_output_layout(const cldnn::program_node& node, const kernel_impl_params& impl_param) const override {
        OPENVINO_ASSERT(node.type() == this, "[GPU] primitive_type_base::calc_output_layout: primitive type mismatch");
-        return typed_primitive_inst<PType>::calc_output_layout(node, impl_param);
+        for (auto& t : impl_param.input_layouts) {
+            GPU_DEBUG_TRACE_DETAIL << impl_param.desc->id << " input tensor: " << t.to_short_string() << std::endl;
+        }
+        auto res = typed_primitive_inst<PType>::calc_output_layout(node, impl_param);
+
+        GPU_DEBUG_TRACE_DETAIL << impl_param.desc->id << " output tensor: " << res.to_short_string() << std::endl;
+        return res;
    }

    std::vector<cldnn::layout> calc_output_layouts(const cldnn::program_node& node, const kernel_impl_params& impl_param) const override {
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -541,26 +541,35 @@ bool primitive_inst::update_impl() {
        }
        if (!cached_impl) {
            if (_dynamic_impl) {
-                auto& compilation_context = get_network().get_program()->get_compilation_context();
-                compilation_context.push_task(updated_params_no_dyn_pad.hash(), [this, &compilation_context, updated_params_no_dyn_pad]() {
-                    if (compilation_context.is_stopped())
-                        return;
-                    auto _program = get_network().get_program();
-                    auto& cache = _program->get_implementations_cache();
-                    {
-                        // Check existense in the cache one more time as several iterations of model execution could happens and multiple compilation
-                        // tasks created for same shapes
-                        if (cache.has(updated_params_no_dyn_pad))
+                auto use_async_compilation = [&]() {
+                    GPU_DEBUG_GET_INSTANCE(debug_config);
+                    GPU_DEBUG_IF(debug_config->disable_async_compilation) {
+                        return false;
+                    }
+                    return true;
+                };
+                if (use_async_compilation()) {
+                    auto& compilation_context = get_network().get_program()->get_compilation_context();
+                    compilation_context.push_task(updated_params_no_dyn_pad.hash(), [this, &compilation_context, updated_params_no_dyn_pad]() {
+                        if (compilation_context.is_stopped())
                            return;
-                    }
+                        auto _program = get_network().get_program();
+                        auto& cache = _program->get_implementations_cache();
+                        {
+                            // Check existense in the cache one more time as several iterations of model execution could happens and multiple compilation
+                            // tasks created for same shapes
+                            if (cache.has(updated_params_no_dyn_pad))
+                                return;
+                        }

-                    auto impl = _node->type()->choose_impl(*_node, updated_params_no_dyn_pad);
-                    if (!can_be_optimized()) {
-                        auto kernels = _program->get_kernels_cache().compile(updated_params_no_dyn_pad, impl->get_kernels_source());
-                        impl->set_kernels(kernels);
-                        cache.add(updated_params_no_dyn_pad, impl->clone());
-                    }
-                });
+                        auto impl = _node->type()->choose_impl(*_node, updated_params_no_dyn_pad);
+                        if (!can_be_optimized()) {
+                            auto kernels = _program->get_kernels_cache().compile(updated_params_no_dyn_pad, impl->get_kernels_source());
+                            impl->set_kernels(kernels);
+                            cache.add(updated_params_no_dyn_pad, impl->clone());
+                        }
+                    });
+                }
                if (!can_be_optimized())  {
                    _impl = _dynamic_impl->clone();
                    auto new_impl_params = _impl->canonicalize_shapes(*_impl_params);
--- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
+++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
@@ -131,6 +131,7 @@ static void print_help_messages() {
                              " For example fc:onednn gemm:onednn reduce:ocl do:cpu"
                              " For primitives fc, gemm, do, reduce, concat are supported. Separated by space.");
    message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels");
+    message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation");
    message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space.");
    message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in"
                              "the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), "
@@ -171,7 +172,8 @@ debug_configuration::debug_configuration()
        , dump_layers_raw(0)
        , base_batch_for_memory_estimation(-1)
        , serialize_compile(0)
-        , max_kernels_per_batch(0) {
+        , max_kernels_per_batch(0)
+        , disable_async_compilation(0) {
 #ifdef GPU_DEBUG_CONFIG
    get_gpu_debug_env_var("Help", help);
    get_common_debug_env_var("Verbose", verbose);
@@ -199,6 +201,7 @@ debug_configuration::debug_configuration()
    std::string forced_impl_types_str;
    get_gpu_debug_env_var("ForceImplTypes", forced_impl_types_str);
    get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch);
+    get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation);
    std::string dump_iteration_str;
    get_gpu_debug_env_var("DumpIteration", dump_iteration_str);
    std::string mem_preallocation_params_str;