This reverts commit 10ac5b280b.
This commit is contained in:
@@ -165,22 +165,6 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
|
||||
input_layout.data_padding};
|
||||
}
|
||||
|
||||
// Adjust output format for mixed precision case in onednn
|
||||
auto out_fmt = input_layout.format;
|
||||
if (node.get_preferred_impl_type() == impl_types::onednn) {
|
||||
if (data_type_traits::is_i8_u8(output_type)) {
|
||||
if (input_layout.format == format::b_fs_yx_fsv16)
|
||||
out_fmt = format::b_fs_yx_fsv32;
|
||||
else if (input_layout.format == format::bs_fs_yx_bsv32_fsv16)
|
||||
out_fmt = format::bs_fs_yx_bsv32_fsv32;
|
||||
} else if (data_type_traits::is_floating_point(output_type)) {
|
||||
if (input_layout.format == format::b_fs_yx_fsv32)
|
||||
out_fmt = format::b_fs_yx_fsv16;
|
||||
else if (input_layout.format == format::bs_fs_yx_bsv32_fsv32)
|
||||
out_fmt = format::bs_fs_yx_bsv32_fsv16;
|
||||
}
|
||||
}
|
||||
|
||||
// get output feature map from weights. It should be the same as number of biases. Will be verifed in
|
||||
// convolution::create()
|
||||
auto group = desc->groups;
|
||||
@@ -224,7 +208,7 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
|
||||
return {output_type, format::b_fs_yx_32fp, output_size};
|
||||
}
|
||||
|
||||
return {output_type, out_fmt, output_size};
|
||||
return {output_type, input_layout.format, output_size};
|
||||
}
|
||||
|
||||
auto output_range = calc_sliding_window_output_range<swor_mode::all>(input_layout.size,
|
||||
@@ -247,7 +231,8 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
|
||||
if (output_type == data_types::bin) {
|
||||
return {output_type, format::b_fs_yx_32fp, output_size};
|
||||
}
|
||||
return {output_type, out_fmt, output_size};
|
||||
|
||||
return {output_type, input_layout.format, output_size};
|
||||
}
|
||||
|
||||
std::string convolution_inst::to_string(convolution_node const& node) {
|
||||
|
||||
@@ -16,21 +16,15 @@
|
||||
#include "permute_inst.h"
|
||||
#include "depth_to_space_inst.h"
|
||||
#include "region_yolo_inst.h"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
#define LOG_NODE_REMOVAL(id) GPU_DEBUG_IF(debug_config->verbose >= 2) { \
|
||||
GPU_DEBUG_COUT << "[remove_redundant_reorders:" << __LINE__ << "] " << "Remove node: " << (id) << std::endl; }
|
||||
|
||||
|
||||
remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing, bool update_implementations,
|
||||
bool remove_output_reorders)
|
||||
: base_pass("remove_redundant_reorders"), lo(lo_ref), enable_reorder_fusing(enable_reorder_fusing), update_implementations(update_implementations),
|
||||
remove_output_reorders(remove_output_reorders) {}
|
||||
|
||||
void remove_redundant_reorders::run(program& p) {
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
auto update_implementation = [&](program_node& node) {
|
||||
if (!update_implementations)
|
||||
return;
|
||||
@@ -119,7 +113,6 @@ void remove_redundant_reorders::run(program& p) {
|
||||
}
|
||||
|
||||
node.can_be_optimized(true);
|
||||
LOG_NODE_REMOVAL(node.id());
|
||||
p.extract_and_remove(node);
|
||||
|
||||
for (auto rl : recalc_list) {
|
||||
@@ -175,7 +168,6 @@ void remove_redundant_reorders::run(program& p) {
|
||||
dep_prim->output_format = output_layout.format;
|
||||
dep_prim->output_data_type = output_layout.data_type;
|
||||
|
||||
LOG_NODE_REMOVAL(r_node.id());
|
||||
r_node.can_be_optimized(true);
|
||||
p.add_optimized_primitive_info(r_node.id());
|
||||
p.extract_and_remove(r_node);
|
||||
@@ -254,8 +246,6 @@ void remove_redundant_reorders::run(program& p) {
|
||||
} else {
|
||||
p.add_optimized_primitive_info(r_node.get_primitive()->id);
|
||||
}
|
||||
|
||||
LOG_NODE_REMOVAL(r_node.id());
|
||||
p.extract_and_remove(
|
||||
r_node); // try to remove if possible (with respect to r_node not being marked as output)
|
||||
}
|
||||
@@ -302,8 +292,6 @@ void remove_redundant_reorders::run(program& p) {
|
||||
// pointing to, we should increment it again
|
||||
if (remove_reorder_node == *itr)
|
||||
itr++;
|
||||
|
||||
LOG_NODE_REMOVAL(remove_reorder_node->id());
|
||||
p.replace_all_usages(*remove_reorder_node, *node);
|
||||
p.add_optimized_primitive_info(remove_reorder_node->id());
|
||||
p.remove_all_connections(*remove_reorder_node);
|
||||
@@ -348,8 +336,6 @@ void remove_redundant_reorders::run(program& p) {
|
||||
if (input.type()->does_possible_implementation_exist(input)) {
|
||||
node.can_be_optimized(true);
|
||||
p.add_optimized_primitive_info(node.id());
|
||||
|
||||
LOG_NODE_REMOVAL(node.id());
|
||||
p.extract_and_remove(node);
|
||||
} else {
|
||||
input.set_output_layout(old_output_layout_of_input, false);
|
||||
@@ -377,8 +363,6 @@ void remove_redundant_reorders::run(program& p) {
|
||||
continue;
|
||||
|
||||
dep.merge_output_padding(node.get_output_layout().data_padding);
|
||||
|
||||
LOG_NODE_REMOVAL(node.id());
|
||||
p.replace_all_usages(node, dep);
|
||||
p.add_optimized_primitive_info(node.id());
|
||||
p.remove_all_connections(node);
|
||||
@@ -410,7 +394,6 @@ void remove_redundant_reorders::run(program& p) {
|
||||
return false;
|
||||
|
||||
dep.merge_output_padding(node->get_output_layout().data_padding);
|
||||
LOG_NODE_REMOVAL(node->id());
|
||||
p.replace_all_usages(*node, dep);
|
||||
p.get_processing_order().erase(node);
|
||||
p.add_optimized_primitive_info(node->id());
|
||||
@@ -472,7 +455,6 @@ void remove_redundant_reorders::run(program& p) {
|
||||
node->set_input_layout(local_desc.input_layout);
|
||||
|
||||
// remove reorder node
|
||||
LOG_NODE_REMOVAL(node->id());
|
||||
node->can_be_optimized(true);
|
||||
p.add_optimized_primitive_info(node->id());
|
||||
p.extract_and_remove(*node);
|
||||
@@ -540,14 +522,12 @@ void remove_redundant_reorders::run(program& p) {
|
||||
reshape_node.get_fused_activations_funcs().empty() && reshape_node.get_fused_primitives().empty();
|
||||
|
||||
if (remove_dep) {
|
||||
LOG_NODE_REMOVAL(reshape_input_node.id());
|
||||
reshape_input_node.can_be_optimized(true);
|
||||
p.add_optimized_primitive_info(reshape_input_node.id());
|
||||
p.extract_and_remove(reshape_input_node);
|
||||
}
|
||||
|
||||
if (remove_current) {
|
||||
LOG_NODE_REMOVAL(reshape_node.id());
|
||||
reshape_node.can_be_optimized(true);
|
||||
p.add_optimized_primitive_info(reshape_node.id());
|
||||
p.extract_and_remove(reshape_node);
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
@@ -563,7 +562,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
|
||||
}
|
||||
};
|
||||
|
||||
const auto reorder_convolution = [&p, &lo, &rf, &debug_config](typed_program_node<convolution>& conv_node) {
|
||||
const auto reorder_convolution = [&p, &lo, &rf](typed_program_node<convolution>& conv_node) {
|
||||
{
|
||||
// reorder weights convolution
|
||||
auto& weights = conv_node.weights();
|
||||
@@ -603,43 +602,35 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
|
||||
conv_node.get_dependencies().front()->set_output_layout(new_layout, false);
|
||||
}
|
||||
|
||||
// reorder for onednn mixed-precision conv
|
||||
// If the layouts are like below, change input layout to fsv32.
|
||||
// From:
|
||||
// (bsv32_fsv16.u8) --> conv --> (bsv32_fsv16.fp16)
|
||||
// To:
|
||||
// (bsv32_fsv16.u8) --> reorder --> (bsv32_fsv32.u8) --> conv --> (bsv32_fsv16.fp16)
|
||||
//
|
||||
// Do not apply such change for b=1 first conv
|
||||
enum class __data_type {i8_u8, floating_point};
|
||||
// Errata for mixed precision in onednn
|
||||
// data_type, wrong_format, correct_format
|
||||
std::vector<std::tuple<__data_type, format, format>> errata = {
|
||||
{__data_type::i8_u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32},
|
||||
{__data_type::i8_u8, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv32},
|
||||
{__data_type::floating_point, format::b_fs_yx_fsv32, format::b_fs_yx_fsv16},
|
||||
{__data_type::floating_point, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv16}};
|
||||
for (auto &e : errata) {
|
||||
std::vector<format> wrong_format = {format::b_fs_yx_fsv16, format::bs_fs_yx_bsv32_fsv16};
|
||||
std::vector<format> correct_format = {format::b_fs_yx_fsv32, format::bs_fs_yx_bsv32_fsv32};
|
||||
for (int i = 0; i < wrong_format.size(); i++) {
|
||||
// reorder for onednn mixed-precision conv
|
||||
// If the layouts are like below, change input layout to fsv32.
|
||||
// From:
|
||||
// (bsv32_fsv16.u8) --> conv --> (bsv32_fsv16.fp16)
|
||||
// To:
|
||||
// (bsv32_fsv16.u8) --> reorder --> (bsv32_fsv32.u8) --> conv --> (bsv32_fsv16.fp16)
|
||||
//
|
||||
// Do not apply such change for b=1 first conv
|
||||
|
||||
auto prev_node = conv_node.get_dependencies().front();
|
||||
auto prev_layout = prev_node->get_output_layout();
|
||||
auto old_layout = prev_node->get_output_layout();
|
||||
auto conv_layout = conv_node.get_output_layout();
|
||||
auto is_target_dt_in_errata = (std::get<0>(e) == __data_type::i8_u8 && data_type_traits::is_i8_u8(prev_layout.data_type)) ||
|
||||
(std::get<0>(e) == __data_type::floating_point && data_type_traits::is_floating_point(prev_layout.data_type));
|
||||
auto wrong_format = std::get<1>(e);
|
||||
auto correct_format = std::get<2>(e);
|
||||
if (lo.get_optimization_attributes().use_onednn_impls
|
||||
&& is_target_dt_in_errata
|
||||
&& conv_layout.format == wrong_format
|
||||
&& prev_layout.format == wrong_format
|
||||
&& !(prev_layout.size.batch[0] == 1 && prev_layout.size.feature[0] <= 4)) {
|
||||
auto new_layout = prev_layout;
|
||||
new_layout.format = correct_format;
|
||||
&& conv_layout.format == wrong_format[i]
|
||||
&& data_type_traits::is_i8_u8(old_layout.data_type)
|
||||
&& (old_layout.format == wrong_format[i])
|
||||
&& !(old_layout.size.batch[0] == 1 && old_layout.size.feature[0] <= 4)) {
|
||||
auto new_layout = old_layout;
|
||||
new_layout.format = correct_format[i];
|
||||
auto new_input = rf.get_reorder(prev_node->id(),
|
||||
prev_layout,
|
||||
old_layout,
|
||||
new_layout);
|
||||
|
||||
if (new_input.first)
|
||||
if (new_input.first) {
|
||||
p.add_intermediate(new_input.first, conv_node, 0, !new_input.second);
|
||||
}
|
||||
|
||||
// Prevent layout propagation as we are using mixed precision for conv
|
||||
conv_node.get_dependencies().front()->set_output_layout(new_layout, false);
|
||||
|
||||
Reference in New Issue
Block a user