diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
index 337cb04c420..91806b6ff89 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
@@ -7,22 +7,63 @@
 #include <mutex>
 #include <vector>
 
+namespace ov {
+namespace intel_gpu {
+
+// Verbose log levels:
+// DISABLED - silent mode (Default)
+// INFO - Minimal verbose:
+//     * May log basic info about device, plugin configuration, model and execution statistics
+//     * Mustn't log any info that depend on neither number of iterations or number of layers in the model
+//     * Minimal impact on both load time and inference time
+// LOG - Enables graph optimization verbose:
+//     * Includes info from log level INFO
+//     * May log info about applied graph transformations, memory allocations and other model compilation time steps
+//     * May impact compile_model() execution time
+//     * Minimal impact on inference time
+// TRACE - Enables basic execution time verbose
+//     * Includes info from log level LOG
+//     * May log info during model execution
+//     * May log short info about primitive execution
+//     * May impact network execution time
+// TRACE_DETAIL - Max verbosity
+//     * Includes info from log level TRACE
+//     * May log any stage and print detailed info about each execution step
+enum class LogLevel : int8_t {
+    DISABLED = 0,
+    INFO = 1,
+    LOG = 2,
+    TRACE = 3,
+    TRACE_DETAIL = 4
+};
+
+}  // namespace intel_gpu
+}  // namespace ov
+
 #ifdef GPU_DEBUG_CONFIG
 #define GPU_DEBUG_IF(cond) if (cond)
 #define GPU_DEBUG_PROFILED_STAGE(stage) \
     auto stage_prof = cldnn::instrumentation::profiled_stage<primitive_inst>(\
         !cldnn::debug_configuration::get_instance()->dump_profiling_data.empty(), *this, stage)
 #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val) stage_prof.set_cache_hit(val)
+
+#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (cldnn::debug_configuration::get_instance()->verbose >= min_verbose_level) \
+                                                std::cout << cldnn::debug_configuration::prefix
+#define GPU_DEBUG_LOG_RAW(min_verbose_level) GPU_DEBUG_LOG_RAW_INT(static_cast<std::underlying_type<ov::intel_gpu::LogLevel>::type>(min_verbose_level))
 #else
 #define GPU_DEBUG_IF(cond) if (0)
 #define GPU_DEBUG_PROFILED_STAGE(stage)
 #define GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(val)
+#define GPU_DEBUG_LOG_RAW(min_verbose_level) if (0) std::cout << cldnn::debug_configuration::prefix
 #endif
 
-#define GPU_DEBUG_COUT std::cout << cldnn::debug_configuration::prefix
 // Macro below is inserted to avoid unused variable warning when GPU_DEBUG_CONFIG is OFF
 #define GPU_DEBUG_GET_INSTANCE(name) auto name = cldnn::debug_configuration::get_instance(); (void)(name);
 
+#define GPU_DEBUG_INFO              GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::INFO)
+#define GPU_DEBUG_LOG               GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::LOG)
+#define GPU_DEBUG_TRACE             GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE)
+#define GPU_DEBUG_TRACE_DETAIL      GPU_DEBUG_LOG_RAW(ov::intel_gpu::LogLevel::TRACE_DETAIL)
 
 namespace cldnn {
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
index 97cc6b9d772..c0209b1a3ce 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
@@ -22,10 +22,8 @@
 
 using namespace cldnn;
 
-#define LOG_NODE_REMOVAL(id) GPU_DEBUG_IF(debug_config->verbose >= 2) {                                                         \
-            GPU_DEBUG_COUT << "[remove_redundant_reorders:" << __LINE__ << "] " << "Remove node: " << (id) << std::endl; }
-#define LOG_NODE_REPLACEMENT(id) GPU_DEBUG_IF(debug_config->verbose >= 2) {                                                     \
-            GPU_DEBUG_COUT << "[remove_redundant_reorders:" << __LINE__ << "] " << "Replace node: " << (id) << std::endl; }
+#define LOG_NODE_REMOVAL(id)      GPU_DEBUG_LOG_PASS << "Remove node: " << (id) << std::endl;
+#define LOG_NODE_REPLACEMENT(id)  GPU_DEBUG_LOG_PASS << "Replace node: " << (id) << std::endl;
 
 
 remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing, bool update_implementations,
@@ -34,7 +32,6 @@ remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, b
     remove_output_reorders(remove_output_reorders) {}
 
 void remove_redundant_reorders::run(program& p) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     auto update_implementation = [&](program_node& node) {
         if (!update_implementations)
             return;
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
index 794abdfd072..63673c844dd 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
@@ -34,8 +34,6 @@ void reorder_inputs::run(program& p) { run(p, _lo, _rf); }
 namespace {
 
 std::map<program_node*, format::type> get_preferred_formats(program& p, layout_optimizer& lo) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-
     std::map<program_node*, format::type> fmt_map;
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
@@ -59,9 +57,7 @@ std::map<program_node*, format::type> get_preferred_formats(program& p, layout_o
     if (onednn_impls_counter < 1 && lo.get_optimization_attributes().use_onednn_impls) {
         should_update_fmt_map = true;
         lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, 0);
-        GPU_DEBUG_IF(debug_config->verbose >= 1) {
-            GPU_DEBUG_COUT << "The return to clDNN implementations" << std::endl;
-        }
+        GPU_DEBUG_LOG << "Disable oneDNN implementations globally" << std::endl;
     }
 
     if (should_update_fmt_map)
@@ -277,10 +273,7 @@ void propagate_formats_rec(std::map<program_node*, format::type>& fmt_map,
 
     fmt = travel_direction_wrapper<dir>::first(first_fmt, second_fmt);
     fmt_map.at(node) = fmt;
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 2) {
-        GPU_DEBUG_COUT << "[clDNN][reorder_inputs] propagate_formats_rec: " << node->id() << " - " << fmt_to_str(fmt) << std::endl;
-    }
+    GPU_DEBUG_LOG << "Propagate_formats_rec: " << node->id() << " - " << fmt_to_str(fmt) << std::endl;
 
     for (auto next : travel_direction_wrapper<dir>::next_nodes(node)) {
         if (!next->is_in_data_flow())
@@ -312,10 +305,7 @@ void propagate_formats_rec<direction_e::backwards>(std::map<program_node*, forma
 
     fmt = travel_direction_wrapper<direction_e::backwards>::first(first_fmt, second_fmt);
     fmt_map.at(node) = fmt;
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 2) {
-        GPU_DEBUG_COUT << "[clDNN][reorder_inputs] propagate_formats_rec: " << node->id() << " - " << fmt_to_str(fmt) << std::endl;
-    }
+    GPU_DEBUG_LOG << "Propagate_formats_rec: " << node->id() << " - " << fmt_to_str(fmt) << std::endl;
 
     for (auto next : travel_direction_wrapper<direction_e::backwards>::next_nodes(node)) {
         if (!next.first->is_in_data_flow())
@@ -541,7 +531,6 @@ const char *dir_msg(direction_e dir) {
 // If there is layout mismatch between two layers, add reorder
 template <direction_e dir>
 void insert_reorders_in_dir(program& p, const std::map<program_node*, format::type>& fmt_map, reorder_factory& rf, layout_optimizer& lo, program_node* node) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     auto fmt = fmt_map.at(node);
 
     auto next_cpy = travel_direction_wrapper<dir>::next_nodes(node);
@@ -565,10 +554,8 @@ void insert_reorders_in_dir(program& p, const std::map<program_node*, format::ty
         in_layout.format = get_target_output_format(lo, fmt_map, predecessor, successor);
         out_layout.format = get_target_input_format(lo, fmt_map, successor, predecessor);
 
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << __func__ << ":" << __LINE__ << ":" << dir_msg(dir) << "  " << node->id() << " --> " << next->id() << " ## "
-                    << fmt_to_str(in_layout.format) << " --> " << fmt_to_str(out_layout.format) << std::endl;
-        }
+        GPU_DEBUG_LOG << __func__ << ":" << __LINE__ << ":" << dir_msg(dir) << "  " << node->id() << " --> " << next->id() << " ## "
+                      << fmt_to_str(in_layout.format) << " --> " << fmt_to_str(out_layout.format) << std::endl;
 
         if (in_layout.format == format::any || out_layout.format == format::any)
             continue;
@@ -580,10 +567,8 @@ void insert_reorders_in_dir(program& p, const std::map<program_node*, format::ty
         auto reorder = reorder_pair.first;
         if (reorder && (in_layout.format != format::any && out_layout.format != format::any)) {
             auto& reorder_node = p.get_or_create(reorder);
-            GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                GPU_DEBUG_COUT << __func__ << ":" << __LINE__ << ":" << dir_msg(dir) << "  " << reorder_node.id()
-                        << "  Reorder is added" << std::endl;
-            }
+            GPU_DEBUG_LOG << __func__ << ":" << __LINE__ << ":" << dir_msg(dir) << "  " << reorder_node.id()
+                          << "  Reorder is added" << std::endl;
             p.add_intermediate(reorder_node,
                                *travel_direction_wrapper<dir>::second(node, next),
                                *travel_direction_wrapper<dir>::first(node, next),
@@ -595,7 +580,6 @@ void insert_reorders_in_dir(program& p, const std::map<program_node*, format::ty
 template <>
 void insert_reorders_in_dir<direction_e::backwards>(program& p, const std::map<program_node*, format::type>& fmt_map,
                                                     reorder_factory& rf, layout_optimizer& lo, program_node* node) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     auto fmt = fmt_map.at(node);
 
     auto next_cpy = travel_direction_wrapper<direction_e::backwards>::next_nodes(node);
@@ -619,11 +603,9 @@ void insert_reorders_in_dir<direction_e::backwards>(program& p, const std::map<p
         in_layout.format = get_target_output_format(lo, fmt_map, predecessor, successor);
         out_layout.format = get_target_input_format(lo, fmt_map, successor, predecessor);
 
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << __func__ << ":" << __LINE__ << ":" << dir_msg(direction_e::backwards)
-                    << "  " << node->id() << " --> " << next.first->id() << " ## "
-                    << fmt_to_str(in_layout.format) << " --> " << fmt_to_str(out_layout.format) << std::endl;
-        }
+        GPU_DEBUG_LOG << __func__ << ":" << __LINE__ << ":" << dir_msg(direction_e::backwards)
+                      << "  " << node->id() << " --> " << next.first->id() << " ## "
+                      << fmt_to_str(in_layout.format) << " --> " << fmt_to_str(out_layout.format) << std::endl;
 
         if (in_layout.format == format::any || out_layout.format == format::any)
             continue;
@@ -635,10 +617,8 @@ void insert_reorders_in_dir<direction_e::backwards>(program& p, const std::map<p
         auto reorder = reorder_pair.first;
         if (reorder && (in_layout.format != format::any && out_layout.format != format::any)) {
             auto& reorder_node = p.get_or_create(reorder);
-            GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                GPU_DEBUG_COUT << __func__ << ":" << __LINE__ << ":" << dir_msg(direction_e::backwards) << "  " << reorder_node.id()
-                        << "  Reorder is added" << std::endl;
-            }
+            GPU_DEBUG_LOG << __func__ << ":" << __LINE__ << ":" << dir_msg(direction_e::backwards) << "  " << reorder_node.id()
+                          << "  Reorder is added" << std::endl;
             p.add_intermediate(reorder_node,
                                *travel_direction_wrapper<direction_e::backwards>::second(node, next.first),
                                *travel_direction_wrapper<direction_e::backwards>::first(node, next.first),
@@ -684,30 +664,26 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
 
     auto fmt_map = get_preferred_formats(p, lo);
 
-    GPU_DEBUG_IF(debug_config->verbose >= 2) {
-        GPU_DEBUG_COUT << "[clDNN][reorder_inputs] Preferred formats:" << std::endl;
-        for (auto& node_fmt : fmt_map) {
-            if (node_fmt.second != format::any) {
-                GPU_DEBUG_COUT << "[clDNN][reorder_inputs]   " << node_fmt.first->id() << " " << fmt_to_str(node_fmt.second) << std::endl;
-            }
+    GPU_DEBUG_LOG_PASS << "Preferred formats:" << std::endl;
+    for (auto& node_fmt : fmt_map) {
+        if (node_fmt.second != format::any) {
+            GPU_DEBUG_LOG_PASS << "  " << node_fmt.first->id() << " " << fmt_to_str(node_fmt.second) << std::endl;
         }
     }
 
     propagate_formats(p, fmt_map, lo);
     minimize_local_reorders(p, fmt_map, lo);
 
-    GPU_DEBUG_IF(debug_config->verbose >= 2) {
-        GPU_DEBUG_COUT << "[clDNN][reorder_inputs] Selected formats:" << std::endl;
-        for (auto node_ptr : p.get_processing_order()) {
-            if (fmt_map.count(node_ptr) == 0)
-                continue;
+    GPU_DEBUG_LOG_PASS << "Selected formats:" << std::endl;
+    for (auto node_ptr : p.get_processing_order()) {
+        if (fmt_map.count(node_ptr) == 0)
+            continue;
 
-            auto fmt = fmt_map.at(node_ptr);
-            GPU_DEBUG_COUT << "[clDNN][reorder_inputs]   " << node_ptr->id() << " " << fmt_to_str(fmt) << std::endl;
-        }
+        auto fmt = fmt_map.at(node_ptr);
+        GPU_DEBUG_LOG_PASS << "  " << node_ptr->id() << " " << fmt_to_str(fmt) << std::endl;
     }
 
-    GPU_DEBUG_IF(debug_config->verbose >= 1) {
+    GPU_DEBUG_IF(debug_config->verbose >= 2) {
         reorder_cnt total_reorder_count = std::accumulate(
             p.get_processing_order().begin(),
             p.get_processing_order().end(),
@@ -719,8 +695,8 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
             return reorder_cnt{ total.number + count.number, total.total_sizes + count.total_sizes };
         });
         // Divide results by two as above function will each reorder from both sides
-        GPU_DEBUG_COUT << "[clDNN][reorder_inputs] Total number of reorders: " << total_reorder_count.number / 2 << std::endl;
-        GPU_DEBUG_COUT << "[clDNN][reorder_inputs] Total elements count of all reorders: " << total_reorder_count.total_sizes / 2 << std::endl;
+        GPU_DEBUG_LOG_PASS << "Total number of reorders: " << total_reorder_count.number / 2 << std::endl;
+        GPU_DEBUG_LOG_PASS << "Total elements count of all reorders: " << total_reorder_count.total_sizes / 2 << std::endl;
 
         // Count number of reorders that will be fused
         size_t nodes_with_fusing = 0;
@@ -736,8 +712,8 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
                 }
             }
         }
-        GPU_DEBUG_COUT << "[clDNN][reorder_inputs] Number of nodes with fused reorders: " << nodes_with_fusing << std::endl;
-        GPU_DEBUG_COUT << "----------------------------------------------" << std::endl;
+        GPU_DEBUG_LOG_PASS << "Number of nodes with fused reorders: " << nodes_with_fusing << std::endl;
+        GPU_DEBUG_LOG_PASS << "----------------------------------------------" << std::endl;
     }
 
     insert_reorders(p, fmt_map, rf, lo);
@@ -802,7 +778,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
         }
     };
 
-    const auto reorder_convolution = [&p, &lo, &rf, &debug_config](typed_program_node<convolution>& conv_node) {
+    const auto reorder_convolution = [&p, &lo, &rf](typed_program_node<convolution>& conv_node) {
         {
             // reorder weights convolution
             auto& weights = conv_node.weights();
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
index 61695cc4e88..c9932a91e3f 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp
@@ -32,7 +32,6 @@ void select_preferred_formats::run(program& p) {
         return;
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     for (auto n : p.get_processing_order()) {
         // Onednn primitive descriptor creation may fail, for example, due to asymmetric weight.
         try {
@@ -47,9 +46,7 @@ void select_preferred_formats::run(program& p) {
 
             _lo.select_preferred_formats_for_onednn(*n, prim_desc);
         } catch(std::exception &exception) {
-            GPU_DEBUG_IF(debug_config->verbose >= 1) {
-                std::cout << "WARNING(select_preferred_formats): " << exception.what() << std::endl;
-            }
+            GPU_DEBUG_INFO << "WARNING(select_preferred_formats): " << exception.what() << std::endl;
         }
     }
 #endif  // ENABLE_ONEDNN_FOR_GPU
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
index abba0259aeb..ed5b70b9830 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
@@ -68,18 +68,15 @@ protected:
             dnnl::memory::desc desc = onednn::layout_to_memory_desc(a_zp->get_layout(), dnnl::memory::format_tag::a, true);
             args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC, a_zp->get_onednn_memory(desc)});
 
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->verbose >= 3) {
-                auto dnnl_mem = a_zp->get_onednn_memory(desc);
-                void *mapped_ptr = dnnl_mem.map_data();
-                if (mapped_ptr) {
-                    GPU_DEBUG_COUT << instance.id() << " activations_zero_points: ";
-                    for (size_t i = 0; i < desc.get_size(); ++i) {
-                        std::cout << static_cast<int32_t*>(mapped_ptr)[i] << " ";
-                    }
-                    std::cout << std::endl;
-                    dnnl_mem.unmap_data(mapped_ptr);
+            auto dnnl_mem = a_zp->get_onednn_memory(desc);
+            void *mapped_ptr = dnnl_mem.map_data();
+            if (mapped_ptr) {
+                GPU_DEBUG_TRACE_DETAIL << instance.id() << " activations_zero_points: ";
+                for (size_t i = 0; i < desc.get_size(); ++i) {
+                    GPU_DEBUG_TRACE_DETAIL << static_cast<int32_t*>(mapped_ptr)[i] << " ";
                 }
+                GPU_DEBUG_TRACE_DETAIL << std::endl;
+                dnnl_mem.unmap_data(mapped_ptr);
             }
         }
 
@@ -88,18 +85,15 @@ protected:
             dnnl::memory::desc desc = onednn::layout_to_memory_desc(w_zp->get_layout(), dnnl::memory::format_tag::a, true);
             args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS, w_zp->get_onednn_memory(desc)});
 
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->verbose >= 3) {
-                auto dnnl_mem = w_zp->get_onednn_memory(desc);
-                void *mapped_ptr = dnnl_mem.map_data();
-                if (mapped_ptr) {
-                    GPU_DEBUG_COUT << instance.id() << " weights_zero_points: ";
-                    for (size_t i = 0; i < desc.get_size(); ++i) {
-                        std::cout << static_cast<int32_t*>(mapped_ptr)[i] << " ";
-                    }
-                    std::cout << std::endl;
-                    dnnl_mem.unmap_data(mapped_ptr);
+            auto dnnl_mem = w_zp->get_onednn_memory(desc);
+            void *mapped_ptr = dnnl_mem.map_data();
+            if (mapped_ptr) {
+                GPU_DEBUG_TRACE_DETAIL << instance.id() << " weights_zero_points: ";
+                for (size_t i = 0; i < desc.get_size(); ++i) {
+                    GPU_DEBUG_TRACE_DETAIL << static_cast<int32_t*>(mapped_ptr)[i] << " ";
                 }
+                GPU_DEBUG_TRACE_DETAIL << std::endl;
+                dnnl_mem.unmap_data(mapped_ptr);
             }
         }
 
diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
index c305e98595f..fec91bdb680 100644
--- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h
+++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
@@ -23,6 +23,8 @@
 
 #include <fstream>
 
+#define GPU_DEBUG_LOG_PASS    GPU_DEBUG_LOG << "[" << get_name() << "] "
+
 namespace cldnn {
 class base_pass {
     friend class pass_manager;
diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
index b031e7ade69..436db5228bc 100644
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@@ -1350,8 +1350,8 @@ impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node)
                 preferred_type = impl_types::cpu;
 
             if (node.id() == forced_impl_type.substr(0, found_type)) {
-                GPU_DEBUG_COUT << " Forced implementation type : " << forced_impl_type.substr(0, found_type) << " : "
-                    << forced_impl_type.substr(found_type + 1) << std::endl;
+                GPU_DEBUG_LOG << " Forced implementation type : " << forced_impl_type.substr(0, found_type) << " : "
+                              << forced_impl_type.substr(found_type + 1) << std::endl;
                 return preferred_type;
             }
         }
@@ -1819,7 +1819,6 @@ format layout_optimizer::get_preferred_format(program_node& node) {
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
 void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, dnnl::primitive_desc prim_desc) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     if (node.is_input() || !are_data_types_suitable_for_onednn(node)) {
         return;
     }
@@ -1868,10 +1867,8 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d
                     node.set_preferred_output_fmt(usr, dst_fmt);
             }
 
-            GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                std::cout << "select_preferred_formats:" << node.id() << ": " << fmt_to_str(src_fmt) << " --> " << fmt_to_str(dst_fmt)
-                 << " For index : " << idx << std::endl;
-            }
+            GPU_DEBUG_LOG << "select_preferred_formats:" << node.id() << ": " << fmt_to_str(src_fmt) << " --> " << fmt_to_str(dst_fmt)
+                          << " For index : " << idx << std::endl;
         }
     }
 
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index be43981c33e..e7036268e52 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -890,9 +890,8 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "NetworkImpl::Execute");
     // Wait for previous execution completion
     reset_execution(false);
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 1)
-        GPU_DEBUG_COUT << "----------------------------------------------" << std::endl;
+    GPU_DEBUG_TRACE << "----------------------------------------------" << std::endl;
+    GPU_DEBUG_TRACE << "Start network execution" << std::endl;
 
     std::vector<memory::ptr> in_out_mem;
     auto is_surface_lock_check_needed = [&](const shared_mem_type& shared_mem_type) {
@@ -928,6 +927,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
     auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream());
 
     set_arguments();
+    GPU_DEBUG_GET_INSTANCE(debug_config);
     for (auto& inst : _exec_order) {
         GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
             const std::string layer_name = inst->id();
@@ -1120,10 +1120,7 @@ void network::allocate_primitive_instance(program_node const& node) {
     if (_primitives.count(node.id()))
         return;
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 4) {
-        GPU_DEBUG_COUT << node.id() << ": allocate primitive instance" << std::endl;
-    }
+    GPU_DEBUG_TRACE_DETAIL << node.id() << ": allocate primitive instance" << std::endl;
 
     auto inst = node.type()->create_instance(*this, node);
 
@@ -1191,10 +1188,7 @@ void network::transfer_memory_to_device(std::shared_ptr<primitive_inst> instance
         // Allocate and transfer memory
         auto device_mem = inst_mem.get_engine()->allocate_memory(inst_mem.get_layout(), allocation_type::usm_device, false);
         device_mem->copy_from(get_stream(), inst_mem);
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << node.id() << ": constant]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << node.id() << ": constant]" << std::endl;
         _memory_pool->release_memory(&inst_mem, node.id(), get_id());
         instance->set_output_memory(device_mem);
     }
diff --git a/src/plugins/intel_gpu/src/graph/pass_manager.cpp b/src/plugins/intel_gpu/src/graph/pass_manager.cpp
index f217d3b6224..53bec3925a5 100644
--- a/src/plugins/intel_gpu/src/graph/pass_manager.cpp
+++ b/src/plugins/intel_gpu/src/graph/pass_manager.cpp
@@ -48,11 +48,13 @@ void pass_manager::run(program& p, base_pass& pass) {
     using ms = std::chrono::duration<double, std::ratio<1, 1000>>;
     using Time = std::chrono::high_resolution_clock;
 
+    GPU_DEBUG_LOG << "Run pass " << pass.get_name() << std::endl;
     auto start = Time::now();
     pass.run(p);
     auto stop = Time::now();
     std::chrono::duration<float> fs = stop - start;
     ms opt_pass_time = std::chrono::duration_cast<ms>(fs);
+    GPU_DEBUG_LOG << "Pass " << pass.get_name() << " execution time: " << opt_pass_time.count() << " ms" << std::endl;
 
     p.save_pass_info(pass.get_name());
 
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index e814dafa91d..b1868de9169 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -142,7 +142,6 @@ void primitive_inst::set_output_memory(memory::ptr mem_new, bool check, size_t i
 }
 
 void primitive_inst::update_shape() {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::shape_inference);
 
     bool input_shape_changed = false;
@@ -199,9 +198,7 @@ void primitive_inst::update_shape() {
         // Events may be not created for in-order queue, so take them for OOO queue only
         if (_network.has_event(dep.id()) && queue_type == queue_types::out_of_order) {
             dependencies_events.push_back(_network.get_primitive_event(dep_id));
-            GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                GPU_DEBUG_COUT << id() << ": shape infer waits for " << i << " dependency\n";
-            }
+            GPU_DEBUG_TRACE_DETAIL << id() << ": shape infer waits for " << i << " dependency\n";
         }
         auto dep_mem = _network.get_output_memory(dep_id);
         memory_deps.insert({i, dep_mem});
@@ -221,10 +218,8 @@ void primitive_inst::update_shape() {
     auto update_output_layout = [&](layout& layout, size_t idx) {
         layout.data_padding = padding::max(_node->get_primitive()->output_paddings[idx], layout.data_padding);
         if (_impl_params->get_output_layout(idx) != layout) {
-            GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                GPU_DEBUG_COUT << id() << ": update shape: was: " << _impl_params->get_output_layout(idx).to_short_string()
-                               << " now: " << layout.to_short_string() << std::endl;
-            }
+            GPU_DEBUG_TRACE_DETAIL << id() << ": update shape: was: " << _impl_params->get_output_layout(idx).to_short_string()
+                                   << " now: " << layout.to_short_string() << std::endl;
             set_shape_change();
         }
         _impl_params->output_layouts[idx] = layout;
@@ -265,16 +260,12 @@ void primitive_inst::realloc_if_needed() {
     bool can_reuse_buffer = _outputs[0] && actual_layout.count() <= max_output_layout_size;
 
     if (can_reuse_buffer) {
-        GPU_DEBUG_IF(debug_config->verbose >= 4) {
-            GPU_DEBUG_COUT << id() << ": reuse previously allocated output buffer" << std::endl;
-        }
+        GPU_DEBUG_TRACE_DETAIL << id() << ": reuse previously allocated output buffer" << std::endl;
         _outputs[0] = _network.get_engine().reinterpret_buffer(*_outputs[0], actual_layout);
     } else {
-        GPU_DEBUG_IF(debug_config->verbose >= 4) {
-            GPU_DEBUG_COUT << id() << ": realloc output memory. "
-                           <<  " Current buffer_size=" << max_output_layout_size
-                           <<  " Requested buffer_size=" << actual_layout.count() << std::endl;
-        }
+        GPU_DEBUG_TRACE_DETAIL << id() << ": realloc output memory. "
+                               <<  " Current buffer_size=" << max_output_layout_size
+                               <<  " Requested buffer_size=" << actual_layout.count() << std::endl;
         _outputs = allocate_outputs(&updated_params);
         // TODO : need to handle multiple outputs
         max_output_layout_size = updated_params.output_layouts[0].count();
@@ -285,7 +276,6 @@ void primitive_inst::realloc_if_needed() {
 
 void primitive_inst::update_impl() {
     GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_implementation);
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     auto prev_impl_str =  _impl != nullptr ? _impl->get_kernel_name() : "nullptr";
     auto extend_to_6d = [this](ov::PartialShape ps) -> std::vector<size_t> {
         // For shape_of we extend shape with 1-s to 6d rank to make kernel simpler
@@ -321,7 +311,7 @@ void primitive_inst::update_impl() {
         return seed;
     };
 
-    auto update_shape_info = [this, extend_to_6d, debug_config, prev_impl_str](const kernel_impl_params& params) {
+    auto update_shape_info = [this, extend_to_6d, prev_impl_str](const kernel_impl_params& params) {
         mem_lock<int32_t> lock(_shape_info_memory, _network.get_stream());
         size_t offset = 0;
         for (size_t i = 0; i < _node->get_dependencies().size(); i++) {
@@ -337,17 +327,14 @@ void primitive_inst::update_impl() {
             for (size_t j = 0; j < output_shape.size(); j++)
                 lock[offset++] = static_cast<int32_t>(output_shape[j]);
         }
-        GPU_DEBUG_IF(debug_config->verbose >= 4) {
-            std::stringstream s;
-            s << "shapes: ";
-            for (size_t i = 0; i < offset; i++)
-                s << lock[i] << " ";
-            GPU_DEBUG_COUT << id() << ": update dynamic impl " << prev_impl_str << " to new shape: " << s.str() << std::endl;
-        }
+        std::stringstream s;
+        s << "shapes: ";
+        for (size_t i = 0; i < offset; i++)
+            s << lock[i] << " ";
+        GPU_DEBUG_TRACE_DETAIL << id() << ": update dynamic impl " << prev_impl_str << " to new shape: " << s.str() << std::endl;
     };
 
     if (!_node->is_type<data>() && !(_node->is_type<mutable_data>() && _node->get_dependencies().empty())) {
-        GPU_DEBUG_GET_INSTANCE(debug_config);
         // Update param if fake_alignment is available
         auto updated_params = _node->type()->get_fake_aligned_params(*_impl_params);
         auto layout_key = get_layout_key(updated_params);
@@ -359,10 +346,7 @@ void primitive_inst::update_impl() {
             if (has_cached_impl) {
                 _impl = cache.get(layout_key)->clone();
                 GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
-
-                GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                    GPU_DEBUG_COUT << id() << ": get impl from cache " << _impl->get_kernel_name() << std::endl;
-                }
+                GPU_DEBUG_TRACE_DETAIL << id() << ": get impl from cache " << _impl->get_kernel_name() << std::endl;
             }
         }
         if (!has_cached_impl) {
@@ -402,10 +386,9 @@ void primitive_inst::update_impl() {
                 kernels_cache.reset();
                 std::lock_guard<std::mutex> lock(get_network().get_impl_cache_mutex());
                 cache.add(layout_key, _impl->clone());
-                GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                    auto new_impl_str = _impl != nullptr ? _impl->get_kernel_name() : "nullptr";
-                    GPU_DEBUG_COUT << id() << ": update impl from " << prev_impl_str << " to " << new_impl_str << std::endl;
-                }
+
+                auto new_impl_str = _impl != nullptr ? _impl->get_kernel_name() : "nullptr";
+                GPU_DEBUG_TRACE_DETAIL << id() << ": update impl from " << prev_impl_str << " to " << new_impl_str << std::endl;
             }
         }
 
@@ -436,14 +419,9 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
                     subgraph->set_input_data(d.first->id(), actual_mem);
                 }
             }
-            GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                GPU_DEBUG_COUT << "[Start] Executing unfused subgraph of " << id() << std::endl;
-            }
-
+            GPU_DEBUG_TRACE_DETAIL << "[Start] Executing unfused subgraph of " << id() << std::endl;
             auto outputs = subgraph->execute(events);
-            GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                GPU_DEBUG_COUT << "[End] Finished executing unfused subgraph of " << id() << std::endl;
-            }
+            GPU_DEBUG_TRACE_DETAIL << "[End] Finished executing unfused subgraph of " << id() << std::endl;
 
             auto last_fd = _impl_params->fused_desc.back();
             auto last_prim_id = last_fd.desc->id;
@@ -476,28 +454,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
     }
     on_execute();
 
-    GPU_DEBUG_IF(debug_config->verbose >= 1) {
-        std::ostringstream in_addr;
-        // buffer_ptr() only support usm_memory
-        for (size_t i = 0; i < this->dependencies().size(); i++) {
-            auto in_mem = dep_memory_ptr(i);
-            if (in_mem) {
-                in_addr << in_mem->buffer_ptr();
-                if (i < this->dependencies().size() - 1) {
-                    in_addr << ", ";
-                }
-            }
-        }
-        auto out_mem = output_memory_ptr();
-        auto out_alloc_type = out_mem ? out_mem->get_allocation_type() : allocation_type::unknown;
-        auto out_ptr = out_mem ? out_mem->buffer_ptr() : nullptr;
-        auto impl_name = _impl->get_kernel_name();
-
-        GPU_DEBUG_COUT << id() << ": execute " << impl_name << ". Memory type: "
-                       << out_alloc_type << ", in_usm("
-                       << in_addr.str() << "), out_usm("
-                       << out_ptr << ")" << std::endl;
-    }
+    GPU_DEBUG_TRACE << id() << ": execute " << _impl->get_kernel_name() << std::endl;
 
     if (_exec_deps.empty() && dependencies.empty()) {
         dependencies = events;
@@ -695,10 +652,7 @@ void primitive_inst::allocate_internal_buffers(void) {
     // allocate intermediate memory for the updated layout of buffer
     std::vector<memory::cptr> intermediates_memory;
     for (auto layout : ibuf_layouts) {
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << _node->id() << ": internal buf]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << _node->id() << ": internal buf]" << std::endl;
         auto alloc_type = allocation_type::unknown;
         if (input_device_mem && (available_device_mem_size - (int64_t)layout.bytes_count() >= 0)) {
             alloc_type = engine.get_preferred_memory_allocation_type();
@@ -719,8 +673,6 @@ event::ptr primitive_inst::update_weights() {
     if (!weightable_node)
         return nullptr;
 
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-
     auto& weights_params = _impl->_weights_reorder_params;
     bool requires_reorder = weights_params.engine != kernel_selector::GenericKernelParams::Engine::NONE &&
                             (!_impl_params->reordered_weights || _impl_params->reordered_weights->get_layout() != from_weights_tensor(weights_params.dest));
@@ -744,17 +696,13 @@ event::ptr primitive_inst::update_weights() {
         if (layout_key != "") {
             auto& cache = get_network().get_in_mem_kernels_cache();
             if (cache.has(layout_key)) {
-                GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                    GPU_DEBUG_COUT << id() << ": reorder weights (cached) from " << original_layout.to_short_string()
-                                   << " to " << expected_layout.to_short_string() << std::endl;
-                }
+                GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights (cached) from " << original_layout.to_short_string()
+                                       << " to " << expected_layout.to_short_string() << std::endl;
                 GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
                 kernel = cache.get(layout_key);
             } else {
-                GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                    GPU_DEBUG_COUT << id() << ": reorder weights from " << original_layout.to_short_string()
-                                   << " to " << expected_layout.to_short_string() << std::endl;
-                }
+                GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights from " << original_layout.to_short_string()
+                                       << " to " << expected_layout.to_short_string() << std::endl;
                 auto& kernels_cache = get_network().get_kernels_cache();
                 auto kernel_id = kernels_cache.set_kernel_source(weights_params.clKernel->code.kernelString, false);
                 kernels_cache.compile();
@@ -768,9 +716,7 @@ event::ptr primitive_inst::update_weights() {
 
         bool can_reuse = _impl_params->reordered_weights != nullptr && _impl_params->reordered_weights->size() <= expected_layout.bytes_count();
         if (can_reuse) {
-            GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                GPU_DEBUG_COUT << id() << ": reuse weights memory" << std::endl;
-            }
+            GPU_DEBUG_TRACE_DETAIL << id() << ": reuse weights memory" << std::endl;
             _impl_params->reordered_weights = engine.reinterpret_buffer(*_impl_params->reordered_weights, expected_layout);
         } else {
             auto alloc_type = engine.get_preferred_memory_allocation_type();
@@ -783,6 +729,7 @@ event::ptr primitive_inst::update_weights() {
         stream.set_arguments(*kernel, weights_params.clKernel->params, args);
         auto ev = stream.enqueue_kernel(*kernel, weights_params.clKernel->params, args, {}, true);
 
+        GPU_DEBUG_GET_INSTANCE(debug_config);
         GPU_DEBUG_IF(!debug_config->dump_profiling_data.empty()) {
             stream.wait_for_events({ev});
         }
@@ -851,15 +798,12 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
     bool is_cpu = _node.get_selected_impl() ? _node.get_selected_impl()->is_cpu() : false;
     auto use_lockable_memory = is_output_buffer(_node) || is_cpu || is_any_user_cpu(_node.get_users()) ||
                                !_engine.supports_allocation(allocation_type::usm_device);
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     const auto& lockable_mem_type = _engine.get_lockable_preferred_memory_allocation_type(layout.format.is_image_2d());
     const auto& alloc_type = use_lockable_memory ? lockable_mem_type
         : usm_device_allocatable ? allocation_type::usm_device : lockable_mem_type;
 
     if ((is_internal && (_node.can_be_optimized() || _node.is_type<generic_layer>())) || (memory_reuse_by_user == false)) {
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << _node.id() << ": output]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;
         return get_memory_from_pool(_engine,
                 layout,
                 _node.id(),
@@ -868,21 +812,15 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
                 false);
     } else if (is_internal && _node.is_output() && _node.is_type<generic_layer>() &&
             _engine.supports_allocation(allocation_type::usm_device) && usm_device_allocatable) {
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << _node.id() << ": output]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;
         return _engine.allocate_memory(layout, allocation_type::usm_device, false);
     } else if (is_internal && !_node.is_output() && _node.is_type<input_layout>()) {
         // Skip memory reset for input_layout primitives, since data will be copied from cldnn::data primitive
         // or just reuse primitive's memory
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << _node.id() << ": constant]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << _node.id() << ": constant]" << std::endl;
         return _engine.allocate_memory(layout, alloc_type, false);
     } else if (is_internal || (!_node.can_share_buffer()) || _node.can_be_optimized() || _node.is_output()) {
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << _node.id() << ": output]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;
         return _engine.allocate_memory(layout, alloc_type);
     } else {
         return get_memory_from_pool(_engine,
@@ -940,10 +878,7 @@ std::string primitive_inst::generic_to_string(program_node const& node, const ch
 }
 
 cldnn::network::ptr primitive_inst::get_unfused_subgraph() {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 4) {
-        GPU_DEBUG_COUT << id() << ": Use unfused subgraph due to unexpected fusions\n";
-    }
+    GPU_DEBUG_TRACE_DETAIL << id() << ": Use unfused subgraph due to unexpected fusions\n";
     if (!_unfused_subgraph) {
         topology t;
 
diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
index 5d13b23bbef..8ac2ad8f073 100644
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -719,17 +719,12 @@ void program::transfer_memory_to_device() {
 
 
             if (alloc_type == allocation_type::usm_host || alloc_type == allocation_type::usm_shared) {
-                GPU_DEBUG_GET_INSTANCE(debug_config);
-                GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                    GPU_DEBUG_COUT << "[" << data_node.id() << ": constant]" << std::endl;
-                }
+                GPU_DEBUG_LOG << "[" << data_node.id() << ": constant]" << std::endl;
                 // Allocate and transfer memory
                 auto device_mem = mem.get_engine()->allocate_memory(data_node_layout, allocation_type::usm_device, false);
                 device_mem->copy_from(get_stream(), mem);
                 data_node.attach_memory(device_mem);
-                GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                    GPU_DEBUG_COUT << "[" << data_node.id() << ": constant]" << std::endl;
-                }
+                GPU_DEBUG_LOG << "[" << data_node.id() << ": constant]" << std::endl;
                 const_cast<memory::ptr&>(data_node.get_primitive()->mem).reset();
                 // TODO: Do we need finish call here? Maybe call it in network::execute() ?
                 get_stream().finish();
@@ -1656,7 +1651,6 @@ std::pair<int64_t, int64_t> program::get_estimated_device_mem_usage() {
               });
     auto& engine = get_engine();
     int64_t host_alloc = 0;
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     // just to prevent the memories from being freed during allocation
     std::unordered_set<memory::ptr> allocated_mem_ptrs;
     for (const auto& node : nodes_to_allocate) {
@@ -1676,10 +1670,8 @@ std::pair<int64_t, int64_t> program::get_estimated_device_mem_usage() {
         if (engine.get_device_info().dev_type == cldnn::device_type::integrated_gpu)
             total_host_alloc_size += engine.get_used_device_memory(allocation_type::usm_device);
         if ((cur_vmem != -1 && total_host_alloc_size > cur_vmem * 0.5) || (total_host_alloc_size >= max_global_mem_size)) {
-            GPU_DEBUG_IF(debug_config->verbose >= 1) {
-                GPU_DEBUG_COUT << "Estimated host mem usage calculated with default base batch size(16) exceeds the available memory ("
-                    << cur_vmem << ")" << std::endl;
-            }
+            GPU_DEBUG_INFO << "Estimated host mem usage calculated with default base batch size(16) exceeds the available memory ("
+                           << cur_vmem << ")" << std::endl;
             return {-1L, -1L};
         }
         #endif
diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp
index d658aa38f71..62a1b53f24f 100644
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@@ -246,23 +246,18 @@ bool program_node::is_detached(bool whole_branch) {
 }
 
 layout program_node::calc_output_layout() const {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
     bool allow_new_shape_infer =
         get_program().get_options().get<build_option_type::allow_new_shape_infer>()->enabled();
     if (allow_new_shape_infer) {
         auto out_layouts = type()->calc_output_layouts(*this, *get_kernel_impl_params());
         if (!out_layouts.empty()) {
-            GPU_DEBUG_IF(debug_config->verbose >= 4) {
-                GPU_DEBUG_COUT << id() << ": calc_output_layout(new):" << out_layouts[0] << std::endl;
-            }
+            GPU_DEBUG_TRACE_DETAIL << id() << ": calc_output_layout(new):" << out_layouts[0] << std::endl;
             return out_layouts[0];
         }
     }
 
     auto res = type()->calc_output_layout(*this, *get_kernel_impl_params());
-    GPU_DEBUG_IF(debug_config->verbose >= 4) {
-        GPU_DEBUG_COUT << id() << ": calc_output_layout:" << res << std::endl;
-    }
+    GPU_DEBUG_TRACE_DETAIL << id() << ": calc_output_layout:" << res << std::endl;
 
     return res;
 }
@@ -528,8 +523,6 @@ bool program_node::has_out_scales(const std::shared_ptr<dnnl::primitive_attr>& a
 
 dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const std::shared_ptr<dnnl::primitive_attr>& attr,
                                                    bool& optimization_is_completed) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-
     // Create new dnnl::post_ops object which will be filled inside the optimization process
     dnnl::post_ops optimized_p_ops;
 
@@ -673,22 +666,18 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
     int64_t prev_post_op_idx = 0;
     bool optimization_done = false;
 
-    GPU_DEBUG_IF(debug_config->verbose >= 3) {
-        GPU_DEBUG_COUT << "================================================" << std::endl;
-        GPU_DEBUG_COUT << " " << id() << ", num of post_ops " << p_ops.len() << std::endl;
-        for (size_t i = 0; i < cur_post_ops.size(); i++)
-            GPU_DEBUG_COUT << "    " << i << ": " << cur_post_ops[i].op_type << std::endl;
-    }
+    GPU_DEBUG_TRACE << "================================================" << std::endl;
+    GPU_DEBUG_TRACE << " " << id() << ", num of post_ops " << p_ops.len() << std::endl;
+    for (size_t i = 0; i < cur_post_ops.size(); i++)
+        GPU_DEBUG_TRACE << "    " << i << ": " << cur_post_ops[i].op_type << std::endl;
 
     remove_optimized_prefix(cur_post_ops);
 
-    GPU_DEBUG_IF(debug_config->verbose >= 3) {
-        GPU_DEBUG_COUT << "remove optimized prefix ------------------------" << std::endl;
-        GPU_DEBUG_COUT << " " << id() << ", num of post_ops " << p_ops.len() << std::endl;
-        for (size_t i = 0; i < cur_post_ops.size(); i++)
-            GPU_DEBUG_COUT << "    " << i << ": " << cur_post_ops[i].op_type << std::endl;
-        GPU_DEBUG_COUT << "----------------------------------->>>>>>>>>>>>>" << std::endl;
-    }
+    GPU_DEBUG_TRACE << "remove optimized prefix ------------------------" << std::endl;
+    GPU_DEBUG_TRACE << " " << id() << ", num of post_ops " << p_ops.len() << std::endl;
+    for (size_t i = 0; i < cur_post_ops.size(); i++)
+        GPU_DEBUG_TRACE << "    " << i << ": " << cur_post_ops[i].op_type << std::endl;
+    GPU_DEBUG_TRACE << "----------------------------------->>>>>>>>>>>>>" << std::endl;
 
     // Get post-ops size for current node
     int64_t post_ops_size = cur_post_ops.size();
@@ -710,8 +699,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
         auto cur_type = cur_post_ops[cur_post_op_idx].op_type;
         auto prev_type = cur_post_ops[prev_post_op_idx].op_type;
 
-        GPU_DEBUG_IF(debug_config->verbose >= 3)
-            GPU_DEBUG_COUT << "before prev_post_op_idx: " << prev_post_op_idx << ", cur_post_op_idx: " << cur_post_op_idx << std::endl;
+        GPU_DEBUG_TRACE << "before prev_post_op_idx: " << prev_post_op_idx << ", cur_post_op_idx: " << cur_post_op_idx << std::endl;
 
         // Ignore optimized operations for "previous" operation in our operation pair
         while (type_is_any_optimized(prev_type) && prev_post_op_idx < post_ops_size - 1) {
@@ -728,8 +716,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
             cur_type = cur_post_ops[cur_post_op_idx].op_type;
         }
 
-        GPU_DEBUG_IF(debug_config->verbose >= 3)
-            GPU_DEBUG_COUT << "after prev_post_op_idx: " << prev_post_op_idx << ", cur_post_op_idx: " << cur_post_op_idx << std::endl;
+        GPU_DEBUG_TRACE << "after prev_post_op_idx: " << prev_post_op_idx << ", cur_post_op_idx: " << cur_post_op_idx << std::endl;
 
         auto cur_idx = static_cast<int>(has_out_scales(attr) ? (cur_post_op_idx >= 1 ? cur_post_op_idx - 1 : 0) : cur_post_op_idx);
         auto prev_idx = static_cast<int>(has_out_scales(attr) ? (prev_post_op_idx >= 1 ? prev_post_op_idx - 1 : 0) : prev_post_op_idx);
@@ -766,10 +753,8 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
 
         bool cur_ops_pair_is_optimized = false;
 
-        GPU_DEBUG_IF(debug_config->verbose >= 3) {
-            GPU_DEBUG_COUT << "prev_idx: " << prev_idx << " " << prev_type
-                           << ", cur_idx: " << cur_idx << " " << cur_type << std::endl;
-        }
+        GPU_DEBUG_TRACE << "prev_idx: " << prev_idx << " " << prev_type
+                         << ", cur_idx: " << cur_idx << " " << cur_type << std::endl;
 
         if (can_try_optimize) {
             if (eltw_and_eltw) {
@@ -1005,12 +990,10 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
         remove_optimized_prefix(cur_post_ops);
     }
 
-    GPU_DEBUG_IF(debug_config->verbose >= 3) {
-        GPU_DEBUG_COUT << ">>>>>>>>>>>>>-----------------------------------" << std::endl;
-        for (size_t i = 0; i < cur_post_ops.size(); i++)
-            GPU_DEBUG_COUT << "    " << i << ": " << cur_post_ops[i].op_type << std::endl;
-        GPU_DEBUG_COUT << "------------------------------------------------" << std::endl;
-    }
+    GPU_DEBUG_TRACE << ">>>>>>>>>>>>>-----------------------------------" << std::endl;
+    for (size_t i = 0; i < cur_post_ops.size(); i++)
+        GPU_DEBUG_TRACE << "    " << i << ": " << cur_post_ops[i].op_type << std::endl;
+    GPU_DEBUG_TRACE << "------------------------------------------------" << std::endl;
 
     add_onednn_fused_primitives(cur_post_ops);
 
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp
index b86c91cc657..9e83418934a 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector.cpp
@@ -107,12 +107,8 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params,
             }
         } catch (std::runtime_error& ex) {
             // we have to handle it in order to avoid exception in KernelSelector as much we can
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->verbose >= 3) {
-                kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]";
-                GPU_DEBUG_COUT << "layerID: " << params.layerID << " kenrel: "
-                    << kernelName << " - " << ex.what() << std::endl;
-            }
+            kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]";
+            GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kenrel: " << kernelName << " - " << ex.what() << std::endl;
         }
     }
 
@@ -202,12 +198,8 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params,
                 }
             } catch (std::runtime_error& ex) {
                 // we have to handle it in order to avoid exception in KernelSelector as much we can
-                GPU_DEBUG_GET_INSTANCE(debug_config);
-                GPU_DEBUG_IF(debug_config->verbose >= 3) {
-                    kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]";
-                    GPU_DEBUG_COUT << "layerID: " << params.layerID << " kenrel: "
-                        << kernelName << " - " << ex.what() << std::endl;
-                }
+                kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]";
+                GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kenrel: " << kernelName << " - " << ex.what() << std::endl;
             }
         }
     }
@@ -231,12 +223,8 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params,
                     }
                 } catch (std::runtime_error& ex) {
                     // we have to handle it in order to avoid exception in KernelSelector as much we can
-                    GPU_DEBUG_GET_INSTANCE(debug_config);
-                    GPU_DEBUG_IF(debug_config->verbose >= 3) {
-                        kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]";
-                        GPU_DEBUG_COUT << "layerID: " << params.layerID << " kenrel: "
-                            << kernelName << " - " << ex.what() << std::endl;
-                    }
+                    kernelName = (implementation != nullptr)? implementation->GetName() : "[impl is null]";
+                    GPU_DEBUG_TRACE << "layerID: " << params.layerID << " kenrel: " << kernelName << " - " << ex.what() << std::endl;
                 }
             }
         }
diff --git a/src/plugins/intel_gpu/src/plugin/infer_request.cpp b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
index 713b325d6fc..84c3c5660b8 100644
--- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
@@ -493,7 +493,7 @@ void InferRequest::enqueue() {
     // If dump layers path is set, only runs first inference.
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
-        GPU_DEBUG_COUT << "Only run first inference to dump layers." << std::endl;
+        GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl;
         exit(0);
     }
 }
@@ -760,10 +760,7 @@ void InferRequest::allocate_inputs() {
             OPENVINO_ASSERT(litr != inputLayouts.end(), "[GPU] Input layout for ", name, " is not found");
             const auto input_layout = litr->second;
 
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                GPU_DEBUG_COUT << "[" << name << ": input blob]" << std::endl;
-            }
+            GPU_DEBUG_LOG << "[" << name << ": input blob]" << std::endl;
             if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
                 TensorDesc desc_fp32 = desc;
                 desc_fp32.setPrecision(Precision::FP32);
@@ -796,10 +793,7 @@ void InferRequest::allocate_outputs() {
         if (output_layout.is_static())
             desc.setDims(m_graph->GetOutputSize(no.first));
 
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << no.first << ": output blob]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << no.first << ": output blob]" << std::endl;
 
         outputsMap[no.first] = outputID;
         if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16 ||
diff --git a/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp b/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp
index 58bd488c2af..9d1cd0a2283 100644
--- a/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp
+++ b/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp
@@ -769,7 +769,7 @@ void InferRequestLegacy::enqueue() {
     // If dump layers path is set, only runs first inference.
     GPU_DEBUG_GET_INSTANCE(debug_config);
     GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
-        GPU_DEBUG_COUT << "Only run first inference to dump layers." << std::endl;
+        GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl;
         exit(0);
     }
 }
@@ -1031,10 +1031,7 @@ void InferRequestLegacy::allocate_inputs() {
                 IE_THROW() << "Input layout for " << name << " is not found";
             }
 
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                GPU_DEBUG_COUT << "[" << name << ": input blob]" << std::endl;
-            }
+            GPU_DEBUG_LOG << "[" << name << ": input blob]" << std::endl;
             if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
                 TensorDesc desc_fp32 = desc;
                 desc_fp32.setPrecision(Precision::FP32);
@@ -1088,10 +1085,7 @@ void InferRequestLegacy::allocate_outputs() {
         // Can be removed once 76176 is resolved.
         desc.setDims(m_graph->GetOutputSize(no.first));
 
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << no.first << ": output blob]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << no.first << ": output blob]" << std::endl;
 
         outputsMap[no.first] = outputID;
         if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16 ||
diff --git a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
index 4d05d455743..b023c191c9e 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
@@ -201,10 +201,7 @@ void createClDnnConstant(Program& p, const ngraph::Shape& constDims, const std::
         p.primitive_ids[initialconstPrimID] = constPrimID;
         p.profiling_ids.push_back(initialconstPrimID);
     } else {
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << initialconstPrimID << ": constant]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << initialconstPrimID << ": constant]" << std::endl;
         cldnn::memory::ptr mem = p.GetEngine().allocate_memory(constLayout, false);
         auto& stream = p.GetEngine().get_program_stream();
         cldnn::mem_lock<char> lock{mem, stream};
diff --git a/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp b/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp
index d0581a2970b..21fc053f7e4 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp
@@ -73,10 +73,7 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngr
             cldnn::format::get_default_format(op->get_output_shape(1).size()),
             tensor_from_dims(op->get_output_shape(1)));
 
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
         shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout));
 
         cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write";
diff --git a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp
index 7dd7ca8cecb..c583254513d 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp
@@ -93,7 +93,6 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
         auto outputIndices = op->get_output_partial_shape(0)[0].get_length();
 
         std::vector<cldnn::memory::ptr> shared_memory;
-        GPU_DEBUG_GET_INSTANCE(debug_config);
         switch (num_outputs) {
             case 3: {
                 auto mutable_precision_second = op->get_output_element_type(2);
@@ -105,9 +104,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
                     cldnn::format::get_default_format(op->get_output_shape(2).size()),
                     tensor_from_dims(op->get_output_shape(2)));
 
-                GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                    GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-                }
+                GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
                 shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));
 
                 cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
@@ -123,9 +120,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
                     cldnn::format::bfyx,
                     cldnn::tensor(static_cast<int32_t>(outputIndices), 3, 1, 1));
 
-                GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                    GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-                }
+                GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
                 shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));
 
                 cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
diff --git a/src/plugins/intel_gpu/src/plugin/ops/proposal.cpp b/src/plugins/intel_gpu/src/plugin/ops/proposal.cpp
index 99d28e412f6..93bade2040a 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/proposal.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/proposal.cpp
@@ -64,10 +64,7 @@ static void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::P
                                                     cldnn::format::get_default_format(op->get_output_shape(1).size()),
                                                     tensor_from_dims(op->get_output_shape(1)));
 
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
         auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
 
         cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
diff --git a/src/plugins/intel_gpu/src/plugin/ops/topk.cpp b/src/plugins/intel_gpu/src/plugin/ops/topk.cpp
index 618013160d7..f7132c83007 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/topk.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/topk.cpp
@@ -65,10 +65,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
                                                         cldnn::format::get_default_format(op->get_output_shape(1).size()),
                                                         tensor_from_dims(op->get_output_shape(1)));
 
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
-            }
+            GPU_DEBUG_LOG << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
             auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
 
             cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write";
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 84b842e99a9..cb7fa99fa8a 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -686,25 +686,20 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
         auto closest_pow_of_2 = [] (float x) {
             return pow(2, floor(std::log(x)/std::log(2)));
         };
-        GPU_DEBUG_GET_INSTANCE(debug_config);
         auto model_param = options.find(ov::hint::model.name());
         if (model_param == options.end()) {
-            GPU_DEBUG_IF(debug_config->verbose >= 1) {
-                GPU_DEBUG_COUT << "[GPU_OPTIMAL_BATCH_SIZE] ov::hint::model is not set: return 1" << std::endl;
-            }
+            GPU_DEBUG_INFO << "[OPTIMAL_BATCH_SIZE] ov::hint::model is not set: return 1" << std::endl;
             return decltype(ov::optimal_batch_size)::value_type {static_cast<unsigned int>(1)};
         }
         std::shared_ptr<ngraph::Function> model;
         try {
             model = model_param->second.as<std::shared_ptr<ngraph::Function>>();
         } catch (...) {
-            IE_THROW() << "[GPU_OPTIMAL_BATCH_SIZE] ov::hint::model should be std::shared_ptr<ov::Model> type";
-        }
-        GPU_DEBUG_IF(debug_config->verbose >= 1) {
-            GPU_DEBUG_COUT << "DEVICE_INFO:"
-                           << "gfx_version.major, " << device_info.gfx_ver.major
-                           << "gfx_version.minor " << std::to_string(device_info.gfx_ver.minor) << std::endl;
+            IE_THROW() << "[OPTIMAL_BATCH_SIZE] ov::hint::model should be std::shared_ptr<ov::Model> type";
         }
+        GPU_DEBUG_INFO << "DEVICE_INFO:"
+                       << "gfx_version.major, " << device_info.gfx_ver.major
+                       << "gfx_version.minor " << std::to_string(device_info.gfx_ver.minor) << std::endl;
         static std::map<cldnn::gfx_version, size_t> gen_kbytes_per_bank = {
                 {{12, 0, 0}, 480},  // TGL
                 {{12, 1, 0}, 2048}, // DG1
@@ -722,14 +717,12 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
                                        ? next_pow_of_2(device_info.num_sub_slices_per_slice)
                                        : 2 * device_info.num_sub_slices_per_slice;
             L3_cache_size = kbytes_per_bank * 1024 * num_banks_per_slice * device_info.num_slices;
-            GPU_DEBUG_IF(debug_config->verbose >= 1) {
-                GPU_DEBUG_COUT << "DEVICE_INFO:"
-                               << "num_slices " << device_info.num_slices
-                               << ", num_sub_slices_per_slice " << device_info.num_sub_slices_per_slice
-                               << ", num_banks_per_slice " << num_banks_per_slice
-                               << ", gen_kbytes_per_bank : " << kbytes_per_bank
-                               << ", L3_cache_size is (MB): " << float(L3_cache_size) / 1024 / 1024 << std::endl;
-            }
+            GPU_DEBUG_INFO << "DEVICE_INFO:"
+                           << "num_slices " << device_info.num_slices
+                           << ", num_sub_slices_per_slice " << device_info.num_sub_slices_per_slice
+                           << ", num_banks_per_slice " << num_banks_per_slice
+                           << ", gen_kbytes_per_bank : " << kbytes_per_bank
+                           << ", L3_cache_size is (MB): " << float(L3_cache_size) / 1024 / 1024 << std::endl;
         }
         Config config = _impl->m_configs.GetConfig(device_id);
         auto networkCloned = CloneAndTransformNetwork(CNNNetwork(model), config);
@@ -744,11 +737,9 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
         unsigned int closest = closest_pow_of_2(max_batch_size);
         batch = std::min(closest, batch);
         batch = std::min(256u, batch); //batch 256 is a max
-        GPU_DEBUG_IF(debug_config->verbose >= 1) {
-            GPU_DEBUG_COUT << memPressure.max_mem_tolerance << std::endl;
-            GPU_DEBUG_COUT << "MAX_BATCH: " << max_batch_size << std::endl;
-            GPU_DEBUG_COUT << "ACTUAL OPTIMAL BATCH: " << batch << std::endl;
-        }
+        GPU_DEBUG_INFO << memPressure.max_mem_tolerance << std::endl;
+        GPU_DEBUG_INFO << "MAX_BATCH: " << max_batch_size << std::endl;
+        GPU_DEBUG_INFO << "ACTUAL OPTIMAL BATCH: " << batch << std::endl;
         return decltype(ov::optimal_batch_size)::value_type {batch};
     } else if (name == ov::device::uuid) {
         ov::device::UUID uuid = {};
@@ -815,17 +806,13 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
         }
 
         int64_t available_device_mem = device_info.max_global_mem_size - occupied_device_mem;
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[GPU_MAX_BATCH_SIZE] available memory is " << available_device_mem
-                           << " (occupied: " << occupied_device_mem << ")" << std::endl;
-        }
+        GPU_DEBUG_LOG << "[GPU_MAX_BATCH_SIZE] available memory is " << available_device_mem
+                      << " (occupied: " << occupied_device_mem << ")" << std::endl;
 
         int64_t max_batch_size = 1;
 
         if (options.find(ov::hint::model.name()) == options.end()) {
-            GPU_DEBUG_IF(debug_config->verbose >= 1) {
-                GPU_DEBUG_COUT << "[GPU_MAX_BATCH_SIZE] MODELS_PTR is not set: return 1" << std::endl;
-            }
+            GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] MODELS_PTR is not set: return 1" << std::endl;
             return decltype(ov::max_batch_size)::value_type {static_cast<uint32_t>(max_batch_size)};
         }
 
@@ -849,17 +836,13 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
             }
         }
 
-        GPU_DEBUG_IF(debug_config->verbose >= 2) {
-            GPU_DEBUG_COUT << "[GPU_MAX_BATCH_SIZE] n_streams : " << n_streams << std::endl;
-        }
+        GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] n_streams : " << n_streams << std::endl;
 
         auto available_device_mem_it = options.find(ov::intel_gpu::hint::available_device_mem.name());
         if (available_device_mem_it != options.end()) {
             if (available_device_mem_it->second.is<int64_t>()) {
                 available_device_mem = std::min(static_cast<int64_t>(available_device_mem), available_device_mem_it->second.as<int64_t>());
-                GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                    GPU_DEBUG_COUT << "[GPU_MAX_BATCH_SIZE] available memory is reset by user " << available_device_mem << std::endl;
-                }
+                GPU_DEBUG_LOG << "[GPU_MAX_BATCH_SIZE] available memory is reset by user " << available_device_mem << std::endl;
             } else {
                 IE_THROW() << "[GPU_MAX_BATCH_SIZE] bad casting: ov::intel_gpu::hint::available_device_mem should be int64_t type";
             }
@@ -910,9 +893,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
                 const auto& shape = input->get_partial_shape();
                 // currently no plugin support batched execution for dynamic networks
                 if (shape.is_dynamic()) {
-                    GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                        GPU_DEBUG_COUT << "[MAX_BATCH_SIZE] does not support dynamic networks" << std::endl;
-                    }
+                    GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] does not support dynamic networks" << std::endl;
                     return decltype(ov::max_batch_size)::value_type {static_cast<uint32_t>(max_batch_size)};
                 }
 
@@ -921,10 +902,8 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
                         if (ov::DimensionTracker::get_label(shape[s])) {
                             // batched dim for the input
                             auto batched_input_id = ngraph::op::util::get_ie_output_name(params[input_id]->output(0));
-                            GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                                GPU_DEBUG_COUT << "[MAX_BATCH_SIZE] detected batched input " << batched_input_id
-                                               << "[" << s << "]" << std::endl;
-                            }
+                            GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] detected batched input " << batched_input_id
+                                          << "[" << s << "]" << std::endl;
                             batched_inputs.insert(std::make_pair(batched_input_id, s));
                         }
                     }
@@ -932,9 +911,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
             }
 
             if (!batched_inputs.size()) {
-                GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                    GPU_DEBUG_COUT << "[MAX_BATCH_SIZE] MAX_BATCH_SIZE supports only networks with inputs/outputs featuring batched dim." << std::endl;
-                }
+                GPU_DEBUG_LOG << "[MAX_BATCH_SIZE] MAX_BATCH_SIZE supports only networks with inputs/outputs featuring batched dim." << std::endl;
                 return decltype(ov::max_batch_size)::value_type {static_cast<uint32_t>(max_batch_size)};
             }
 
@@ -944,9 +921,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
                     shapes[input.first][input.second] = base_batch_size;
                 cloned_network.reshape(shapes);
             } catch (...) {
-                GPU_DEBUG_IF(debug_config->verbose >= 1) {
-                    GPU_DEBUG_COUT << "[MAX_BATCH_SIZE] Error at reshape to " << base_batch_size << std::endl;
-                }
+                GPU_DEBUG_INFO << "[MAX_BATCH_SIZE] Error at reshape to " << base_batch_size << std::endl;
                 return decltype(ov::max_batch_size)::value_type {static_cast<uint32_t>(max_batch_size)};
             }
 
@@ -962,15 +937,11 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
                     static_cast<int64_t>(static_cast<int64_t>(available_device_mem) - device_memory_usage.first));
             int64_t mem_per_batch = std::max(static_cast<int64_t>(1L), (device_memory_usage.second / static_cast<int64_t>(base_batch_size)));
             max_batch_size = mem_for_general / (mem_per_batch * static_cast<int64_t>(n_streams));
-            GPU_DEBUG_IF(debug_config->verbose >= 1) {
-                GPU_DEBUG_COUT << "[GPU_MAX_BATCH_SIZE] Base batch size: " << base_batch_size  << std::endl;
-                GPU_DEBUG_COUT << "[GPU_MAX_BATCH_SIZE] Const mem usage: " << device_memory_usage.first  << std::endl;
-                GPU_DEBUG_COUT << "[GPU_MAX_BATCH_SIZE] General mem usage: " << device_memory_usage.second  << std::endl;
-            }
+            GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] Base batch size: " << base_batch_size  << std::endl;
+            GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] Const mem usage: " << device_memory_usage.first  << std::endl;
+            GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] General mem usage: " << device_memory_usage.second  << std::endl;
         } catch (std::exception& e) {
-            GPU_DEBUG_IF(debug_config->verbose >= 1) {
-                GPU_DEBUG_COUT << "[GPU_MAX_BATCH_SIZE] Failed in reshape or build program " << e.what() << std::endl;
-            }
+            GPU_DEBUG_INFO << "[GPU_MAX_BATCH_SIZE] Failed in reshape or build program " << e.what() << std::endl;
         }
         return decltype(ov::max_batch_size)::value_type {static_cast<uint32_t>(max_batch_size)};
     } else if (isModelCachingEnabled && name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) {
diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp
index 930c8bf6092..a726dc10db1 100644
--- a/src/plugins/intel_gpu/src/plugin/program.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program.cpp
@@ -375,11 +375,8 @@ bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const st
 
 void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateSingleLayerPrimitive");
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 2) {
-        GPU_DEBUG_COUT << "Process " << "op::v" << op->get_type_info().version << "::" << op->get_type_name() << " operation "
-                       << "(friendly_name=" << op->get_friendly_name() << ")" << std::endl;
-    }
+    GPU_DEBUG_LOG << "Process " << "op::v" << op->get_type_info().version << "::" << op->get_type_name() << " operation "
+                  << "(friendly_name=" << op->get_friendly_name() << ")" << std::endl;
 
     bool is_created = false;
     const ngraph::NodeTypeInfo* op_type_info = &op->get_type_info();
diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
index d50f090327e..abb45e877b2 100644
--- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
+++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
@@ -17,6 +17,8 @@ const char *debug_configuration::prefix = "GPU_Debug: ";
 
 #ifdef GPU_DEBUG_CONFIG
 
+#define GPU_DEBUG_COUT std::cout << cldnn::debug_configuration::prefix
+
 template<typename T>
 void print_option(std::string option_name, T option_value) {
     GPU_DEBUG_COUT << "Config " << option_name << " = " << option_value << std::endl;
diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp
index f3e7304bd1b..d66bedbab82 100644
--- a/src/plugins/intel_gpu/src/runtime/engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/engine.cpp
@@ -265,11 +265,8 @@ std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type,
     default:
         throw std::runtime_error("Invalid engine type");
     }
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 1) {
-        const auto& info = device->get_info();
-        GPU_DEBUG_COUT << "Selected Device: " << info.dev_name << std::endl;
-    }
+    const auto& info = device->get_info();
+    GPU_DEBUG_INFO << "Selected Device: " << info.dev_name << std::endl;
     return ret;
 }
 
diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
index 4c630ee07ba..12cabd542f4 100644
--- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
+++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp
@@ -299,12 +299,9 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program&
             dump_file << "*/\n";
     }
     if (!err_log.empty()) {
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose) {
-            std::cout << "-------- OpenCL build error" << std::endl;
-            std::cout << err_log << std::endl;
-            std::cout << "-------- End of OpenCL build error" << std::endl;
-        }
+        GPU_DEBUG_INFO << "-------- OpenCL build error" << std::endl;
+        GPU_DEBUG_INFO << err_log << std::endl;
+        GPU_DEBUG_INFO << "-------- End of OpenCL build error" << std::endl;
         std::stringstream err_ss(err_log);
         std::string line;
         int cnt = 0;
diff --git a/src/plugins/intel_gpu/src/runtime/memory.cpp b/src/plugins/intel_gpu/src/runtime/memory.cpp
index 63511ce9228..e097946f204 100644
--- a/src/plugins/intel_gpu/src/runtime/memory.cpp
+++ b/src/plugins/intel_gpu/src/runtime/memory.cpp
@@ -21,12 +21,9 @@ memory::memory(engine* engine, const layout& layout, allocation_type type, bool
     : _engine(engine), _layout(layout), _bytes_count(_layout.bytes_count()), _type(type), _reused(reused) {
     if (!_reused && _engine) {
         _engine->add_memory_used(_bytes_count, type);
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose >= 1) {
-            GPU_DEBUG_COUT << "Allocate " << _bytes_count << " bytes of " << type << " allocation type"
-                           << " (current=" << _engine->get_used_device_memory(type) << ";"
-                           << " max=" << _engine->get_max_used_device_memory(type) << ")" << std::endl;
-        }
+        GPU_DEBUG_LOG << "Allocate " << _bytes_count << " bytes of " << type << " allocation type"
+                      << " (current=" << _engine->get_used_device_memory(type) << ";"
+                      << " max=" << _engine->get_max_used_device_memory(type) << ")" << std::endl;
     }
 }
 
@@ -35,12 +32,9 @@ memory::~memory() {
         try {
             _engine->subtract_memory_used(_bytes_count, _type);
         } catch (...) {}
-        GPU_DEBUG_GET_INSTANCE(debug_config);
-        GPU_DEBUG_IF(debug_config->verbose >= 1) {
-            GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes of " << _type << " allocation type"
-                           << " (current=" << _engine->get_used_device_memory(_type) << ";"
-                           << " max=" << _engine->get_max_used_device_memory(_type) << ")" << std::endl;
-        }
+        GPU_DEBUG_LOG << "Free " << _bytes_count << " bytes of " << _type << " allocation type"
+                      << " (current=" << _engine->get_used_device_memory(_type) << ";"
+                      << " max=" << _engine->get_max_used_device_memory(_type) << ")" << std::endl;
     }
 }
 
diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
index 5bf970f22c7..04bb08888f1 100644
--- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
+++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp
@@ -137,10 +137,7 @@ memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout,
             ++it;
         }
     }
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 2) {
-        GPU_DEBUG_COUT << "[" << id << ": output]" << std::endl;
-    }
+    GPU_DEBUG_LOG << "[" << id << ": output]" << std::endl;
     // didn't find anything for you? create new resource
     auto mem = alloc_memory(layout, type);
     {
@@ -179,10 +176,7 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout,
             memory_record({{id, network_id}}, mem, network_id, type));
         return mem;
     }
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 2) {
-        GPU_DEBUG_COUT << "[" << id << ": output]" << std::endl;
-    }
+    GPU_DEBUG_LOG << "[" << id << ": output]" << std::endl;
     auto mem = alloc_memory(layout, type);
     std::list<memory_record> list = {memory_record({{id, network_id}}, mem, network_id, type)};
     _padded_pool.emplace(layout, std::move(list));
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp
index d2dca452ca4..a501e935e7c 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp
@@ -76,10 +76,7 @@ std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl
         using cmp_t = std::common_type<decltype(queue_properties), typename std::underlying_type<cl::QueueProperties>::type>::type;
         if (!(static_cast<cmp_t>(queue_properties) & static_cast<cmp_t>(cl::QueueProperties::OutOfOrder))) {
             out_of_order = false;
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->verbose >= 1) {
-                GPU_DEBUG_COUT << "Requested out-of-order queue is not supported by current device. Use in-order instead";
-            }
+            GPU_DEBUG_INFO << "Requested out-of-order queue is not supported by current device. Use in-order instead\n";
         }
     }
 
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp
index 872a715838f..52e1b1b5c42 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp
@@ -180,10 +180,10 @@ bool ocl_events::get_profiling_info_impl(std::list<instrumentation::profiling_in
         GPU_DEBUG_GET_INSTANCE(debug_config);
         GPU_DEBUG_IF(debug_config->print_multi_kernel_perf) {
             if (period.stage == instrumentation::profiling_stage::executing) {
-                GPU_DEBUG_COUT << "Multi-kernel time: ";
+                GPU_DEBUG_TRACE << "Multi-kernel time: ";
                 for (auto& duration : all_durations[period.stage])
-                    std::cout << "  " << (duration.second - duration.first) / 1000;
-                std::cout << " Total " << sum / 1000 << std::endl;
+                    GPU_DEBUG_TRACE << "  " << (duration.second - duration.first) / 1000;
+                GPU_DEBUG_TRACE << " Total " << sum / 1000 << std::endl;
             }
         }
 
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
index 0963e1b5f4f..7238ec822dc 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
@@ -338,10 +338,7 @@ void* gpu_usm::lock(const stream& stream, mem_lock_type type) {
             if (type != mem_lock_type::read) {
                 throw std::runtime_error("Unable to lock allocation_type::usm_device with write lock_type.");
             }
-            GPU_DEBUG_GET_INSTANCE(debug_config);
-            GPU_DEBUG_IF(debug_config->verbose >= 2) {
-                GPU_DEBUG_COUT << "Copy usm_device buffer to host buffer." << std::endl;
-            }
+            GPU_DEBUG_LOG << "Copy usm_device buffer to host buffer." << std::endl;
             _host_buffer.allocateHost(_bytes_count);
             cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(), _host_buffer.get(), _buffer.get(), _bytes_count, CL_TRUE);
             _mapped_ptr = _host_buffer.get();
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
index 78fd6a28894..de03847b82f 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
@@ -6,6 +6,7 @@
 #include "ocl_event.hpp"
 #include "ocl_user_event.hpp"
 #include "ocl_command_queues_builder.hpp"
+#include "intel_gpu/runtime/debug_configuration.hpp"
 #include "ocl_kernel.hpp"
 #include "ocl_common.hpp"
 
@@ -49,6 +50,27 @@ inline cl::NDRange toNDRange(const std::vector<size_t>& v) {
     }
 }
 
+cl_int set_kernel_arg(ocl_kernel_type& kernel, uint32_t idx, cldnn::memory::cptr mem) {
+    if (!mem)
+        return CL_INVALID_ARG_VALUE;
+
+    if (mem->get_layout().format.is_image_2d()) {
+        auto buf = std::dynamic_pointer_cast<const ocl::gpu_image2d>(mem)->get_buffer();
+        GPU_DEBUG_TRACE_DETAIL << "kernel: " << kernel.get() << " set arg (image) " << idx << " mem: " << buf.get() << " size: " << mem->size() << std::endl;
+        return kernel.setArg(idx, buf);
+    } else if (memory_capabilities::is_usm_type(mem->get_allocation_type())) {
+        auto buf = std::dynamic_pointer_cast<const ocl::gpu_usm>(mem)->get_buffer();
+        GPU_DEBUG_TRACE_DETAIL << "kernel: " << kernel.get() << " set arg (usm) " << idx << " mem: " << buf.get() << " size: " << mem->size() << std::endl;
+        return kernel.setArgUsm(idx, buf);
+    } else {
+        auto buf = std::dynamic_pointer_cast<const ocl::gpu_buffer>(mem)->get_buffer();
+        GPU_DEBUG_TRACE_DETAIL << "kernel: " << kernel.get() << " set arg (buffer) " << idx << " mem: " << buf.get() << " size: " << mem->size() << std::endl;
+        return kernel.setArg(idx, buf);
+    }
+
+    return CL_INVALID_ARG_VALUE;
+}
+
 void set_arguments_impl(ocl_kernel_type& kernel,
                         const arguments_desc& args,
                         const kernel_arguments_data& data) {
@@ -59,121 +81,44 @@ void set_arguments_impl(ocl_kernel_type& kernel,
         switch (args[i].t) {
             case args_t::INPUT:
                 if (args[i].index < data.inputs.size() && data.inputs[args[i].index]) {
-                    const auto& input_mem = data.inputs[args[i].index];
-                    if (input_mem) {
-                        if (input_mem->get_layout().format.is_image_2d())
-                            status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_image2d>(input_mem)->get_buffer());
-                        else if (memory_capabilities::is_usm_type(input_mem->get_allocation_type()))
-                            status = kernel.setArgUsm(i, std::dynamic_pointer_cast<const ocl::gpu_usm>(input_mem)->get_buffer());
-                        else
-                            status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_buffer>(input_mem)->get_buffer());
-                    }
+                    status = set_kernel_arg(kernel, i, data.inputs[args[i].index]);
                 }
                 break;
             case args_t::INPUT_OF_FUSED_PRIMITIVE:
                 if (args[i].index < data.fused_op_inputs.size() && data.fused_op_inputs[args[i].index]) {
-                    const auto& input_mem = data.fused_op_inputs[args[i].index];
-                    if (input_mem) {
-                        if (memory_capabilities::is_usm_type(input_mem->get_allocation_type()))
-                            status = kernel.setArgUsm(i, std::dynamic_pointer_cast<const ocl::gpu_usm>(input_mem)->get_buffer());
-                        else
-                            status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_buffer>(input_mem)->get_buffer());
-                    }
+                    status = set_kernel_arg(kernel, i, data.fused_op_inputs[args[i].index]);
                 }
                 break;
             case args_t::INTERNAL_BUFFER:
                 if (args[i].index < data.intermediates.size() && data.intermediates[args[i].index]) {
-                    const auto& input_mem = data.intermediates[args[i].index];
-                    if (input_mem) {
-                        if (memory_capabilities::is_usm_type(input_mem->get_allocation_type()))
-                            status = kernel.setArgUsm(i, std::dynamic_pointer_cast<const ocl::gpu_usm>(input_mem)->get_buffer());
-                        else
-                            status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_buffer>(input_mem)->get_buffer());
-                    }
+                    status = set_kernel_arg(kernel, i, data.intermediates[args[i].index]);
                 }
                 break;
             case args_t::OUTPUT:
                 if (args[i].index < data.outputs.size() && data.outputs[args[i].index]) {
-                    const auto& output_mem = data.outputs[args[i].index];
-                    if (output_mem) {
-                         if (output_mem->get_layout().format.is_image_2d())
-                            status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_image2d>(output_mem)->get_buffer());
-                         else if (memory_capabilities::is_usm_type(output_mem->get_allocation_type()))
-                             status = kernel.setArgUsm(i, std::dynamic_pointer_cast<const ocl::gpu_usm>(output_mem)->get_buffer());
-                         else
-                            status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_buffer>(output_mem)->get_buffer());
-                    }
+                    status = set_kernel_arg(kernel, i, data.outputs[args[i].index]);
                 }
                 break;
             case args_t::WEIGHTS:
-                if (data.weights) {
-                    if (data.weights->get_layout().format.is_image_2d())
-                        status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_image2d>(data.weights)->get_buffer());
-                    else if (memory_capabilities::is_usm_type(data.weights->get_allocation_type()))
-                        status = kernel.setArgUsm(i, std::dynamic_pointer_cast<const ocl::gpu_usm>(data.weights)->get_buffer());
-                    else
-                        status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_buffer>(data.weights)->get_buffer());
-                }
+                status = set_kernel_arg(kernel, i, data.weights);
                 break;
             case args_t::BIAS:
-                if (data.bias) {
-                    if (memory_capabilities::is_usm_type(data.bias->get_allocation_type()))
-                        status = kernel.setArgUsm(i, std::dynamic_pointer_cast<const ocl::gpu_usm>(data.bias)->get_buffer());
-                    else
-                        status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_buffer>(data.bias)->get_buffer());
-                }
+                status = set_kernel_arg(kernel, i, data.bias);
                 break;
             case args_t::WEIGHTS_ZERO_POINTS:
-                if (data.weights_zero_points) {
-                    if (memory_capabilities::is_usm_type(data.weights_zero_points->get_allocation_type()))
-                        status = kernel.setArgUsm(
-                            i,
-                            std::dynamic_pointer_cast<const ocl::gpu_usm>(data.weights_zero_points)->get_buffer());
-                    else
-                        status = kernel.setArg(
-                            i,
-                            std::dynamic_pointer_cast<const ocl::gpu_buffer>(data.weights_zero_points)->get_buffer());
-                }
+                status = set_kernel_arg(kernel, i, data.weights_zero_points);
                 break;
             case args_t::ACTIVATIONS_ZERO_POINTS:
-                if (data.activations_zero_points) {
-                    if (memory_capabilities::is_usm_type(data.activations_zero_points->get_allocation_type()))
-                        status = kernel.setArgUsm(
-                            i,
-                            std::dynamic_pointer_cast<const ocl::gpu_usm>(data.activations_zero_points)->get_buffer());
-                    else
-                        status = kernel.setArg(
-                            i,
-                            std::dynamic_pointer_cast<const ocl::gpu_buffer>(data.activations_zero_points)->get_buffer());
-                }
+                status = set_kernel_arg(kernel, i, data.activations_zero_points);
                 break;
             case args_t::COMPENSATION:
-                if (data.compensation) {
-                    if (memory_capabilities::is_usm_type(data.compensation->get_allocation_type()))
-                        status = kernel.setArgUsm(
-                                i,
-                                std::dynamic_pointer_cast<const ocl::gpu_usm>(data.compensation)->get_buffer());
-                    else
-                        status = kernel.setArg(
-                                 i,
-                                 std::dynamic_pointer_cast<const ocl::gpu_buffer>(data.compensation)->get_buffer());
-                }
+                status = set_kernel_arg(kernel, i, data.compensation);
                 break;
             case args_t::SCALE_TABLE:
-                if (data.scale_table) {
-                    if (memory_capabilities::is_usm_type(data.scale_table->get_allocation_type()))
-                        status = kernel.setArgUsm(i, std::dynamic_pointer_cast<const ocl::gpu_usm>(data.scale_table)->get_buffer());
-                    else
-                        status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_buffer>(data.scale_table)->get_buffer());
-                }
+                status = set_kernel_arg(kernel, i, data.scale_table);
                 break;
             case args_t::SLOPE:
-                if (data.slope) {
-                    if (memory_capabilities::is_usm_type(data.slope->get_allocation_type()))
-                        status = kernel.setArgUsm(i, std::dynamic_pointer_cast<const ocl::gpu_usm>(data.slope)->get_buffer());
-                    else
-                        status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_buffer>(data.slope)->get_buffer());
-                }
+                status = set_kernel_arg(kernel, i, data.slope);
                 break;
             case args_t::SPLIT:
                 status = kernel.setArg(i, data.split);
@@ -217,48 +162,17 @@ void set_arguments_impl(ocl_kernel_type& kernel,
                     }
                 }
                 break;
-            case args_t::RECURRENT:  // RNN/LSTM/GRU layers
-                if (data.recurrent) {
-                    if (data.recurrent->get_layout().format.is_image_2d())
-                        status = kernel.setArg(i, dynamic_cast<const ocl::gpu_image2d&>(*data.recurrent).get_buffer());
-                    else if (memory_capabilities::is_usm_type(data.recurrent->get_allocation_type()))
-                        status = kernel.setArgUsm(i, dynamic_cast<const ocl::gpu_usm&>(*data.recurrent).get_buffer());
-                    else
-                        status = kernel.setArg(i, dynamic_cast<const ocl::gpu_buffer&>(*data.recurrent).get_buffer());
-                }
+            case args_t::RECURRENT:
+                status = set_kernel_arg(kernel, i, data.recurrent);
                 break;
-            case args_t::HIDDEN:  // RNN/LSTM/GRU layers
-                if (data.hidden) {
-                    if (data.hidden->get_layout().format.is_image_2d())
-                        status = kernel.setArg(i, dynamic_cast<const ocl::gpu_image2d&>(*data.hidden).get_buffer());
-                    else if (memory_capabilities::is_usm_type(data.hidden->get_allocation_type()))
-                        status = kernel.setArgUsm(i, dynamic_cast<const ocl::gpu_usm&>(*data.hidden).get_buffer());
-                    else
-                        status = kernel.setArg(i, dynamic_cast<const ocl::gpu_buffer&>(*data.hidden).get_buffer());
-                }
+            case args_t::HIDDEN:
+                status = set_kernel_arg(kernel, i, data.hidden);
                 break;
-            case args_t::CELL:  // LSTMlayers
-                if (data.cell) {
-                    if (data.cell->get_layout().format.is_image_2d())
-                        status = kernel.setArg(i, dynamic_cast<const ocl::gpu_image2d&>(*data.cell).get_buffer());
-                    else if (memory_capabilities::is_usm_type(data.cell->get_allocation_type()))
-                        status = kernel.setArgUsm(i, dynamic_cast<const ocl::gpu_usm&>(*data.cell).get_buffer());
-                    else
-                        status = kernel.setArg(i, dynamic_cast<const ocl::gpu_buffer&>(*data.cell).get_buffer());
-                }
+            case args_t::CELL:
+                status = set_kernel_arg(kernel, i, data.cell);
                 break;
             case args_t::SHAPE_INFO:
-                if (args[i].index == 0 && data.shape_info) {
-                    const auto& shape_info_mem = data.shape_info;
-                    if (shape_info_mem) {
-                        if (shape_info_mem->get_layout().format.is_image_2d())
-                            status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_image2d>(shape_info_mem)->get_buffer());
-                        else if (memory_capabilities::is_usm_type(shape_info_mem->get_allocation_type()))
-                            status = kernel.setArgUsm(i, std::dynamic_pointer_cast<const ocl::gpu_usm>(shape_info_mem)->get_buffer());
-                        else
-                            status = kernel.setArg(i, std::dynamic_pointer_cast<const ocl::gpu_buffer>(shape_info_mem)->get_buffer());
-                    }
-                }
+                status = set_kernel_arg(kernel, i, data.shape_info);
                 break;
             default:
                 break;
@@ -359,6 +273,7 @@ void ocl_stream::set_arguments(kernel& kernel, const kernel_arguments_desc& args
     auto& kern = ocl_kernel.get_handle();
 
     try {
+        GPU_DEBUG_TRACE_DETAIL << "Set arguments for primitive: " << args_desc.layerID << " (" << kern.get() << ")\n";
         set_arguments_impl(kern, args_desc.arguments, args);
     } catch (cl::Error const& err) {
         throw ocl_error(err);