[GPU] Add INT32/UINT32 to available input data types when load type is aligned in GetJitLoad (#9300) (#9300)
- Modify fusibility checking to allow sub/div eltwise fusing for other primitives - Modify dump checking code to use node name in exec graph
This commit is contained in:
parent
4eea535e78
commit
bbceae3bc3
@ -5,6 +5,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#ifdef GPU_DEBUG_CONFIG
|
#ifdef GPU_DEBUG_CONFIG
|
||||||
#define GPU_DEBUG_IF(cond) if (cond)
|
#define GPU_DEBUG_IF(cond) if (cond)
|
||||||
@ -32,12 +33,13 @@ public:
|
|||||||
std::string dump_graphs; // Dump optimized graph
|
std::string dump_graphs; // Dump optimized graph
|
||||||
std::string dump_sources; // Dump opencl sources
|
std::string dump_sources; // Dump opencl sources
|
||||||
std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path
|
std::string dump_layers_path; // Enable dumping intermediate buffers and set the dest path
|
||||||
std::string dump_layers; // Dump intermediate buffers of specified layers only, separated by space
|
std::vector<std::string> dump_layers; // Dump intermediate buffers of specified layers only
|
||||||
std::string dry_run_path; // Dry run and serialize execution graph into the specified path
|
std::string dry_run_path; // Dry run and serialize execution graph into the specified path
|
||||||
int dump_layers_dst_only; // Dump only output of layers
|
int dump_layers_dst_only; // Dump only output of layers
|
||||||
int dump_layers_limit_batch; // Limit the size of batch to dump
|
int dump_layers_limit_batch; // Limit the size of batch to dump
|
||||||
int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation
|
int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation
|
||||||
static const debug_configuration *get_instance();
|
static const debug_configuration *get_instance();
|
||||||
|
bool is_dumped_layer(const std::string& layerName) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace cldnn
|
} // namespace cldnn
|
||||||
|
@ -1848,7 +1848,7 @@ std::string FusedOpsCodeGenerator::GetJitLoad(const FusedOpsConfiguration& conf,
|
|||||||
std::string vs = vec_size > 1 ? toCodeString(vec_size) : "";
|
std::string vs = vec_size > 1 ? toCodeString(vec_size) : "";
|
||||||
std::string block_read;
|
std::string block_read;
|
||||||
|
|
||||||
if (input_dt == Datatype::F32) {
|
if (input_dt == Datatype::F32 || input_dt == Datatype::INT32 || input_dt == Datatype::UINT32) {
|
||||||
block_read = CastToType(" intel_sub_group_block_read" + vs + "("
|
block_read = CastToType(" intel_sub_group_block_read" + vs + "("
|
||||||
+ "(const __global uint*)(" + GetInputPtrName(input_id) + " + " + index_func_call_vec + "))",
|
+ "(const __global uint*)(" + GetInputPtrName(input_id) + " + " + index_func_call_vec + "))",
|
||||||
input_dt, vec_size);
|
input_dt, vec_size);
|
||||||
|
@ -137,7 +137,6 @@ debug_configuration::debug_configuration()
|
|||||||
, dump_graphs(std::string())
|
, dump_graphs(std::string())
|
||||||
, dump_sources(std::string())
|
, dump_sources(std::string())
|
||||||
, dump_layers_path(std::string())
|
, dump_layers_path(std::string())
|
||||||
, dump_layers(std::string())
|
|
||||||
, dump_layers_dst_only(0)
|
, dump_layers_dst_only(0)
|
||||||
, dry_run_path(std::string())
|
, dry_run_path(std::string())
|
||||||
, disable_onednn(0)
|
, disable_onednn(0)
|
||||||
@ -151,20 +150,27 @@ debug_configuration::debug_configuration()
|
|||||||
get_gpu_debug_env_var("DumpGraphs", dump_graphs);
|
get_gpu_debug_env_var("DumpGraphs", dump_graphs);
|
||||||
get_gpu_debug_env_var("DumpSources", dump_sources);
|
get_gpu_debug_env_var("DumpSources", dump_sources);
|
||||||
get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
|
get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
|
||||||
get_gpu_debug_env_var("DumpLayers", dump_layers);
|
|
||||||
get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
|
get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
|
||||||
get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch);
|
get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch);
|
||||||
get_gpu_debug_env_var("DisableOnednn", disable_onednn);
|
get_gpu_debug_env_var("DisableOnednn", disable_onednn);
|
||||||
get_gpu_debug_env_var("DryRunPath", dry_run_path);
|
get_gpu_debug_env_var("DryRunPath", dry_run_path);
|
||||||
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
|
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
|
||||||
|
std::string dump_layers_str;
|
||||||
|
get_gpu_debug_env_var("DumpLayers", dump_layers_str);
|
||||||
|
|
||||||
if (help > 0) {
|
if (help > 0) {
|
||||||
print_help_messages();
|
print_help_messages();
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dump_layers.length() > 0)
|
if (dump_layers_str.length() > 0) {
|
||||||
dump_layers = " " + dump_layers + " "; // Insert delimiter for easier parsing when used
|
dump_layers_str = " " + dump_layers_str + " "; // Insert delimiter for easier parsing when used
|
||||||
|
std::stringstream ss(dump_layers_str);
|
||||||
|
std::string layer;
|
||||||
|
while (ss >> layer) {
|
||||||
|
dump_layers.push_back(layer);
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -180,4 +186,16 @@ const debug_configuration *debug_configuration::get_instance() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool debug_configuration::is_dumped_layer(const std::string& layerName) const {
|
||||||
|
#ifdef GPU_DEBUG_CONFIG
|
||||||
|
if (dump_layers.empty()) return true;
|
||||||
|
auto iter = std::find_if(dump_layers.begin(), dump_layers.end(), [&](const std::string& dl){
|
||||||
|
return (layerName.find(dl) != std::string::npos);
|
||||||
|
});
|
||||||
|
return (iter != dump_layers.end());
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
} // namespace cldnn
|
} // namespace cldnn
|
||||||
|
@ -940,8 +940,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
|||||||
|
|
||||||
for (size_t i = 0; i < parents.size(); i++) {
|
for (size_t i = 0; i < parents.size(); i++) {
|
||||||
can_fuse_parents[i] = (parents[i]->is_type<convolution>() && conv_supports_fusings(parents[i]->as<convolution>())) ||
|
can_fuse_parents[i] = (parents[i]->is_type<convolution>() && conv_supports_fusings(parents[i]->as<convolution>())) ||
|
||||||
((prim->mode == eltwise_mode::sum || prim->mode == eltwise_mode::prod) &&
|
(parents[i]->is_type<binary_convolution>() && bin_conv_supports_eltw_fusings(parents[i]->as<binary_convolution>())) ||
|
||||||
((parents[i]->is_type<binary_convolution>() && bin_conv_supports_eltw_fusings(parents[i]->as<binary_convolution>())) ||
|
|
||||||
(parents[i]->is_type<mvn>() && mvn_supports_fusings(parents[i]->as<mvn>())) ||
|
(parents[i]->is_type<mvn>() && mvn_supports_fusings(parents[i]->as<mvn>())) ||
|
||||||
(parents[i]->is_type<deconvolution>()) ||
|
(parents[i]->is_type<deconvolution>()) ||
|
||||||
(parents[i]->is_type<permute>()) ||
|
(parents[i]->is_type<permute>()) ||
|
||||||
@ -959,7 +958,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
|||||||
(parents[i]->is_type<scatter_elements_update>()) ||
|
(parents[i]->is_type<scatter_elements_update>()) ||
|
||||||
(parents[i]->is_type<pooling>() && pooling_supports_fusings(parents[i]->as<pooling>())) ||
|
(parents[i]->is_type<pooling>() && pooling_supports_fusings(parents[i]->as<pooling>())) ||
|
||||||
(parents[i]->is_type<depth_to_space>() && dts_supports_fusings(parents[i]->as<depth_to_space>())) ||
|
(parents[i]->is_type<depth_to_space>() && dts_supports_fusings(parents[i]->as<depth_to_space>())) ||
|
||||||
(parents[i]->is_type<reduce>() && reduce_supports_fusings(parents[i]->as<reduce>()))));
|
(parents[i]->is_type<reduce>() && reduce_supports_fusings(parents[i]->as<reduce>()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disable fusion to a node on constant path when second input is in data flow
|
// Disable fusion to a node on constant path when second input is in data flow
|
||||||
|
@ -178,6 +178,7 @@ void dump<uint32_t>(memory::ptr mem, stream& stream, std::ofstream& file_stream)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
|
static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) {
|
||||||
|
std::cout << "Dump " << layerName << std::endl;
|
||||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||||
std::string filename = layerName;
|
std::string filename = layerName;
|
||||||
std::replace(filename.begin(), filename.end(), '\\', '_');
|
std::replace(filename.begin(), filename.end(), '\\', '_');
|
||||||
@ -208,6 +209,7 @@ static void log_memory_to_file(memory::ptr mem, stream& stream, std::string laye
|
|||||||
(void)layerName;
|
(void)layerName;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Network will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants
|
Network will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants
|
||||||
opt pass).
|
opt pass).
|
||||||
@ -627,6 +629,7 @@ std::map<primitive_id, network_output> network::execute(const std::vector<event:
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void network::execute_impl(const std::vector<event::ptr>& events) {
|
void network::execute_impl(const std::vector<event::ptr>& events) {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "NetworkImpl::Execute");
|
OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "NetworkImpl::Execute");
|
||||||
// Wait for previous execution completion
|
// Wait for previous execution completion
|
||||||
@ -647,19 +650,16 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
|
|||||||
auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream());
|
auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream());
|
||||||
|
|
||||||
set_arguments();
|
set_arguments();
|
||||||
|
|
||||||
for (auto& inst : _exec_order) {
|
for (auto& inst : _exec_order) {
|
||||||
GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
|
GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
|
||||||
auto& node = _program->get_node(inst->id());
|
auto& node = _program->get_node(inst->id());
|
||||||
std::string layer_name = node.id();
|
const std::string layer_name = node.id();
|
||||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||||
std::cerr << get_primitive_info(inst->id()) << std::endl;
|
std::cerr << get_primitive_info(inst->id()) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU_DEBUG_IF(debug_config->dump_layers_dst_only == 0 &&
|
GPU_DEBUG_IF(debug_config->dump_layers_dst_only == 0 &&
|
||||||
(debug_config->dump_layers.length() == 0 ||
|
debug_config->is_dumped_layer(layer_name)) {
|
||||||
(debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos))) {
|
|
||||||
std::cout << "Dump " << layer_name << " layer src" << std::endl;
|
|
||||||
for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
|
for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
|
||||||
log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(),
|
log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(),
|
||||||
layer_name + "_src_" + std::to_string(i));
|
layer_name + "_src_" + std::to_string(i));
|
||||||
@ -682,10 +682,8 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
|
|||||||
GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
|
GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0) {
|
||||||
get_stream().finish();
|
get_stream().finish();
|
||||||
auto& node = _program->get_node(inst->id());
|
auto& node = _program->get_node(inst->id());
|
||||||
std::string layer_name = node.id();
|
const std::string layer_name = node.id();
|
||||||
GPU_DEBUG_IF(debug_config->dump_layers.length() == 0 ||
|
GPU_DEBUG_IF(debug_config->is_dumped_layer(layer_name)) {
|
||||||
(debug_config->dump_layers.length() != 0 && debug_config->dump_layers.find(" " + layer_name + " ") != std::string::npos)) {
|
|
||||||
std::cout << "Dump " << layer_name << " layer dst" << std::endl;
|
|
||||||
log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0");
|
log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user