Add new debug config "DisableRuntimeBufferFusing" (#18726)
This commit is contained in:
parent
f70ef8be5b
commit
ce729761d6
@ -119,6 +119,7 @@ public:
|
|||||||
int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels
|
int max_kernels_per_batch; // Maximum number of kernels in a batch during compiling kernels
|
||||||
int disable_async_compilation; // Disable async compilation
|
int disable_async_compilation; // Disable async compilation
|
||||||
int disable_dynamic_impl; // Disable dynamic implementation
|
int disable_dynamic_impl; // Disable dynamic implementation
|
||||||
|
int disable_runtime_buffer_fusing; // Disable runtime buffer fusing
|
||||||
std::set<int64_t> dump_iteration; // Dump n-th execution of network.
|
std::set<int64_t> dump_iteration; // Dump n-th execution of network.
|
||||||
static const debug_configuration *get_instance();
|
static const debug_configuration *get_instance();
|
||||||
bool is_dumped_layer(const std::string& layerName, bool is_output = false) const;
|
bool is_dumped_layer(const std::string& layerName, bool is_output = false) const;
|
||||||
|
@ -69,6 +69,11 @@ bool concat_in_place_optimization::match(const program_node& concat_node,
|
|||||||
bool is_runtime) {
|
bool is_runtime) {
|
||||||
if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph())
|
if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph())
|
||||||
return false;
|
return false;
|
||||||
|
bool do_runtime_buffer_fusing = true;
|
||||||
|
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||||
|
GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) {
|
||||||
|
do_runtime_buffer_fusing = false;
|
||||||
|
}
|
||||||
auto pred_nodes = concat_node.get_dependencies();
|
auto pred_nodes = concat_node.get_dependencies();
|
||||||
for (auto p : pred_nodes) {
|
for (auto p : pred_nodes) {
|
||||||
// TODO : In dynamic shape only one user is allowed for optimzied concat
|
// TODO : In dynamic shape only one user is allowed for optimzied concat
|
||||||
@ -79,9 +84,9 @@ bool concat_in_place_optimization::match(const program_node& concat_node,
|
|||||||
// for simple patterns where the concat is the only user of all the preds.
|
// for simple patterns where the concat is the only user of all the preds.
|
||||||
// Also cascaded concat is not handled for dynamic shape. for now.
|
// Also cascaded concat is not handled for dynamic shape. for now.
|
||||||
// If we have more flexible exec order handling in the future we'll be able to remove this condition below
|
// If we have more flexible exec order handling in the future we'll be able to remove this condition below
|
||||||
if (p.first->is_dynamic() && p.first->get_users().size() > 1)
|
if (p.first->is_dynamic() && (!do_runtime_buffer_fusing || p.first->get_users().size() > 1))
|
||||||
return false;
|
return false;
|
||||||
if (concat_node.is_dynamic() && !p.first->is_dynamic())
|
if (concat_node.is_dynamic() && (!do_runtime_buffer_fusing || !p.first->is_dynamic()))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// if this is called in primitive_inst::execute() and concat is static, that concat should already be optimized in build time, not in runtime.
|
// if this is called in primitive_inst::execute() and concat is static, that concat should already be optimized in build time, not in runtime.
|
||||||
|
@ -625,6 +625,10 @@ bool primitive_inst::update_impl() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void primitive_inst::do_runtime_in_place_concat() {
|
void primitive_inst::do_runtime_in_place_concat() {
|
||||||
|
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||||
|
GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (update_shape_done_by_other)
|
if (update_shape_done_by_other)
|
||||||
return;
|
return;
|
||||||
if (get_users().size() != 1) return;
|
if (get_users().size() != 1) return;
|
||||||
|
@ -133,6 +133,7 @@ static void print_help_messages() {
|
|||||||
message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels");
|
message_list.emplace_back("OV_GPU_MaxKernelsPerBatch", "Maximum number of kernels in a batch during compiling kernels");
|
||||||
message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation");
|
message_list.emplace_back("OV_GPU_DisableAsyncCompilation", "Disable async compilation");
|
||||||
message_list.emplace_back("OV_GPU_DisableDynamicImpl", "Disable dynamic implementation");
|
message_list.emplace_back("OV_GPU_DisableDynamicImpl", "Disable dynamic implementation");
|
||||||
|
message_list.emplace_back("OV_GPU_DisableRuntimeBufferFusing", "Disable runtime buffer fusing");
|
||||||
message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space.");
|
message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space.");
|
||||||
message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in"
|
message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in"
|
||||||
"the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), "
|
"the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), "
|
||||||
@ -175,7 +176,8 @@ debug_configuration::debug_configuration()
|
|||||||
, serialize_compile(0)
|
, serialize_compile(0)
|
||||||
, max_kernels_per_batch(0)
|
, max_kernels_per_batch(0)
|
||||||
, disable_async_compilation(0)
|
, disable_async_compilation(0)
|
||||||
, disable_dynamic_impl(0) {
|
, disable_dynamic_impl(0)
|
||||||
|
, disable_runtime_buffer_fusing(0) {
|
||||||
#ifdef GPU_DEBUG_CONFIG
|
#ifdef GPU_DEBUG_CONFIG
|
||||||
get_gpu_debug_env_var("Help", help);
|
get_gpu_debug_env_var("Help", help);
|
||||||
get_common_debug_env_var("Verbose", verbose);
|
get_common_debug_env_var("Verbose", verbose);
|
||||||
@ -205,6 +207,7 @@ debug_configuration::debug_configuration()
|
|||||||
get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch);
|
get_gpu_debug_env_var("MaxKernelsPerBatch", max_kernels_per_batch);
|
||||||
get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation);
|
get_gpu_debug_env_var("DisableAsyncCompilation", disable_async_compilation);
|
||||||
get_gpu_debug_env_var("DisableDynamicImpl", disable_dynamic_impl);
|
get_gpu_debug_env_var("DisableDynamicImpl", disable_dynamic_impl);
|
||||||
|
get_gpu_debug_env_var("DisableRuntimeBufferFusing", disable_runtime_buffer_fusing);
|
||||||
std::string dump_iteration_str;
|
std::string dump_iteration_str;
|
||||||
get_gpu_debug_env_var("DumpIteration", dump_iteration_str);
|
get_gpu_debug_env_var("DumpIteration", dump_iteration_str);
|
||||||
std::string mem_preallocation_params_str;
|
std::string mem_preallocation_params_str;
|
||||||
|
Loading…
Reference in New Issue
Block a user