diff --git a/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/debug_configuration.hpp b/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/debug_configuration.hpp index 600a322491b..e64af92508f 100644 --- a/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/debug_configuration.hpp +++ b/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/debug_configuration.hpp @@ -5,6 +5,7 @@ #pragma once #include #include +#include #ifdef GPU_DEBUG_CONFIG #define GPU_DEBUG_IF(cond) if (cond) @@ -32,12 +33,13 @@ public: std::string dump_graphs; // Dump optimized graph std::string dump_sources; // Dump opencl sources std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path - std::string dump_layers; // Dump intermediate buffers of specified layers only, separated by space + std::vector dump_layers; // Dump intermediate buffers of specified layers only std::string dry_run_path; // Dry run and serialize execution graph into the specified path int dump_layers_dst_only; // Dump only output of layers int dump_layers_limit_batch; // Limit the size of batch to dump int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation static const debug_configuration *get_instance(); + bool is_dumped_layer(const std::string& layerName) const; }; } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp index f2de7e3e3ad..fda1ab60d40 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp @@ -1848,7 +1848,7 @@ std::string FusedOpsCodeGenerator::GetJitLoad(const FusedOpsConfiguration& conf, std::string vs = vec_size > 1 ? toCodeString(vec_size) : ""; std::string block_read; - if (input_dt == Datatype::F32) { + if (input_dt == Datatype::F32 || input_dt == Datatype::INT32 || input_dt == Datatype::UINT32) { block_read = CastToType(" intel_sub_group_block_read" + vs + "(" + "(const __global uint*)(" + GetInputPtrName(input_id) + " + " + index_func_call_vec + "))", input_dt, vec_size); diff --git a/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp b/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp index 39053475511..f8594735194 100644 --- a/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp @@ -137,7 +137,6 @@ debug_configuration::debug_configuration() , dump_graphs(std::string()) , dump_sources(std::string()) , dump_layers_path(std::string()) - , dump_layers(std::string()) , dump_layers_dst_only(0) , dry_run_path(std::string()) , disable_onednn(0) @@ -151,20 +150,27 @@ debug_configuration::debug_configuration() get_gpu_debug_env_var("DumpGraphs", dump_graphs); get_gpu_debug_env_var("DumpSources", dump_sources); get_gpu_debug_env_var("DumpLayersPath", dump_layers_path); - get_gpu_debug_env_var("DumpLayers", dump_layers); get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only); get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch); get_gpu_debug_env_var("DisableOnednn", disable_onednn); get_gpu_debug_env_var("DryRunPath", dry_run_path); get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation); + std::string dump_layers_str; + get_gpu_debug_env_var("DumpLayers", dump_layers_str); if (help > 0) { print_help_messages(); exit(0); } - if (dump_layers.length() > 0) - dump_layers = " " + dump_layers + " "; // Insert delimiter for easier parsing when used + if (dump_layers_str.length() > 0) { + dump_layers_str = " " + dump_layers_str + " "; // Insert delimiter for easier parsing when used + std::stringstream ss(dump_layers_str); + std::string layer; + while (ss >> layer) { + dump_layers.push_back(layer); + } + } #endif } @@ -180,4 +186,16 @@ const debug_configuration *debug_configuration::get_instance() { return nullptr; #endif } + +bool debug_configuration::is_dumped_layer(const std::string& layerName) const { +#ifdef GPU_DEBUG_CONFIG + if (dump_layers.empty()) return true; + auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){ + return (layerName.find(dl) != std::string::npos); + }); + return (iter != dump_layers.end()); +#else + return false; +#endif +} } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp index e517c4cf1ef..6df79c97974 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp @@ -940,8 +940,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { for (size_t i = 0; i < parents.size(); i++) { can_fuse_parents[i] = (parents[i]->is_type() && conv_supports_fusings(parents[i]->as())) || - ((prim->mode == eltwise_mode::sum || prim->mode == eltwise_mode::prod) && - ((parents[i]->is_type() && bin_conv_supports_eltw_fusings(parents[i]->as())) || + (parents[i]->is_type() && bin_conv_supports_eltw_fusings(parents[i]->as())) || (parents[i]->is_type() && mvn_supports_fusings(parents[i]->as())) || (parents[i]->is_type()) || (parents[i]->is_type()) || @@ -959,7 +958,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { (parents[i]->is_type()) || (parents[i]->is_type() && pooling_supports_fusings(parents[i]->as())) || (parents[i]->is_type() && dts_supports_fusings(parents[i]->as())) || - (parents[i]->is_type() && reduce_supports_fusings(parents[i]->as())))); + (parents[i]->is_type() && reduce_supports_fusings(parents[i]->as())); } // Disable fusion to a node on constant path when second input is in data flow diff --git a/inference-engine/thirdparty/clDNN/src/network.cpp b/inference-engine/thirdparty/clDNN/src/network.cpp index 29210058d6b..22438b529a6 100644 --- a/inference-engine/thirdparty/clDNN/src/network.cpp +++ b/inference-engine/thirdparty/clDNN/src/network.cpp @@ -178,13 +178,14 @@ void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) } static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) { + std::cout << "Dump " << layerName << std::endl; GPU_DEBUG_GET_INSTANCE(debug_config); std::string filename = layerName; std::replace(filename.begin(), filename.end(), '\\', '_'); std::replace(filename.begin(), filename.end(), '/', '_'); std::replace(filename.begin(), filename.end(), ' ', '_'); std::replace(filename.begin(), filename.end(), ':', '_'); - filename = debug_config->dump_layers_path + filename + ".txt"; + filename = debug_config->dump_layers_path + filename + ".txt"; std::ofstream file_stream(filename); auto mem_dt = mem->get_layout().data_type; @@ -208,6 +209,7 @@ static void log_memory_to_file(memory::ptr mem, stream& stream, std::string laye (void)layerName; } #endif + /* Network will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants opt pass). @@ -627,6 +629,7 @@ std::map network::execute(const std::vector& events) { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "NetworkImpl::Execute"); // Wait for previous execution completion @@ -647,19 +650,16 @@ void network::execute_impl(const std::vector& events) { auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream()); set_arguments(); - for (auto& inst : _exec_order) { GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) { auto& node = _program->get_node(inst->id()); - std::string layer_name = node.id(); + const std::string layer_name = node.id(); GPU_DEBUG_IF(debug_config->verbose >= 2) { std::cerr << get_primitive_info(inst->id()) << std::endl; } GPU_DEBUG_IF(debug_config->dump_layers_dst_only == 0 && - (debug_config->dump_layers.length() == 0 || - (debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos))) { - std::cout << "Dump " << layer_name << " layer src" << std::endl; + debug_config->is_dumped_layer(layer_name)) { for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) { log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(), layer_name + "_src_" + std::to_string(i)); @@ -682,10 +682,8 @@ void network::execute_impl(const std::vector& events) { GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) { get_stream().finish(); auto& node = _program->get_node(inst->id()); - std::string layer_name = node.id(); - GPU_DEBUG_IF(debug_config->dump_layers.length() == 0 || - (debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos)) { - std::cout << "Dump " << layer_name << " layer dst" << std::endl; + const std::string layer_name = node.id(); + GPU_DEBUG_IF(debug_config->is_dumped_layer(layer_name)) { log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0"); } }