Add new debug config "DisableRuntimeBufferFusing" (#18726)

This commit is contained in:
Taylor Yeonbok Lee 2023-07-24 14:56:21 -07:00 committed by GitHub
parent f70ef8be5b
commit ce729761d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 3 deletions

View File

@ -119,6 +119,7 @@ public:
int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels
int disable_async_compilation; // Disable async compilation int disable_async_compilation; // Disable async compilation
int disable_dynamic_impl; // Disable dynamic implementation int disable_dynamic_impl; // Disable dynamic implementation
int disable_runtime_buffer_fusing; // Disable runtime buffer fusing
std::set<int64_t> dump_iteration; // Dump n-th execution of network. std::set<int64_t> dump_iteration; // Dump n-th execution of network.
static const debug_configuration *get_instance(); static const debug_configuration *get_instance();
bool is_dumped_layer(const std::string& layerName, bool is_output = false) const; bool is_dumped_layer(const std::string& layerName, bool is_output = false) const;

View File

@ -69,6 +69,11 @@ bool concat_in_place_optimization::match(const program_node& concat_node,
bool is_runtime) { bool is_runtime) {
if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph()) if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph())
return false; return false;
bool do_runtime_buffer_fusing = true;
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) {
do_runtime_buffer_fusing = false;
}
auto pred_nodes = concat_node.get_dependencies(); auto pred_nodes = concat_node.get_dependencies();
for (auto p : pred_nodes) { for (auto p : pred_nodes) {
// TODO : In dynamic shape only one user is allowed for optimzied concat // TODO : In dynamic shape only one user is allowed for optimzied concat
@ -79,9 +84,9 @@ bool concat_in_place_optimization::match(const program_node& concat_node,
// for simple patterns where the concat is the only user of all the preds. // for simple patterns where the concat is the only user of all the preds.
// Also cascaded concat is not handled for dynamic shape. for now. // Also cascaded concat is not handled for dynamic shape. for now.
// If we have more flexible exec order handling in the future we'll be able to remove this condition below // If we have more flexible exec order handling in the future we'll be able to remove this condition below
if (p.first->is_dynamic() && p.first->get_users().size() > 1) if (p.first->is_dynamic() && (!do_runtime_buffer_fusing || p.first->get_users().size() > 1))
return false; return false;
if (concat_node.is_dynamic() && !p.first->is_dynamic()) if (concat_node.is_dynamic() && (!do_runtime_buffer_fusing || !p.first->is_dynamic()))
return false; return false;
} }
// if this is called in primitive_inst::execute() and concat is static, that concat should already be optimized in build time, not in runtime. // if this is called in primitive_inst::execute() and concat is static, that concat should already be optimized in build time, not in runtime.

View File

@ -625,6 +625,10 @@ bool primitive_inst::update_impl() {
} }
void primitive_inst::do_runtime_in_place_concat() { void primitive_inst::do_runtime_in_place_concat() {
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) {
return;
}
if (update_shape_done_by_other) if (update_shape_done_by_other)
return; return;
if (get_users().size() != 1) return; if (get_users().size() != 1) return;

View File

@ -133,6 +133,7 @@ static void print_help_messages() {
message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels"); message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels");
message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation"); message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation");
message_list.emplace_back("OV_GPU_DisableDynamicImpl", "Disable dynamic implementation"); message_list.emplace_back("OV_GPU_DisableDynamicImpl", "Disable dynamic implementation");
message_list.emplace_back("OV_GPU_DisableRuntimeBufferFusing", "Disable runtime buffer fusing");
message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space."); message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space.");
message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in" message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in"
"the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), " "the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), "
@ -175,7 +176,8 @@ debug_configuration::debug_configuration()
, serialize_compile(0) , serialize_compile(0)
, max_kernels_per_batch(0) , max_kernels_per_batch(0)
, disable_async_compilation(0) , disable_async_compilation(0)
, disable_dynamic_impl(0) { , disable_dynamic_impl(0)
, disable_runtime_buffer_fusing(0) {
#ifdef GPU_DEBUG_CONFIG #ifdef GPU_DEBUG_CONFIG
get_gpu_debug_env_var("Help", help); get_gpu_debug_env_var("Help", help);
get_common_debug_env_var("Verbose", verbose); get_common_debug_env_var("Verbose", verbose);
@ -205,6 +207,7 @@ debug_configuration::debug_configuration()
get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch); get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch);
get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation); get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation);
get_gpu_debug_env_var("DisableDynamicImpl", disable_dynamic_impl); get_gpu_debug_env_var("DisableDynamicImpl", disable_dynamic_impl);
get_gpu_debug_env_var("DisableRuntimeBufferFusing", disable_runtime_buffer_fusing);
std::string dump_iteration_str; std::string dump_iteration_str;
get_gpu_debug_env_var("DumpIteration", dump_iteration_str); get_gpu_debug_env_var("DumpIteration", dump_iteration_str);
std::string mem_preallocation_params_str; std::string mem_preallocation_params_str;