Revert "[GPU] Mixed precision fix for mask rcnn (#10467)" (#10515)

This reverts commit 10ac5b280b.
This commit is contained in:
Alina Kladieva
2022-02-18 16:38:49 +03:00
committed by GitHub
parent d9b1f10074
commit 7985c92095
3 changed files with 26 additions and 70 deletions

View File

@@ -165,22 +165,6 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
input_layout.data_padding};
}
// Adjust output format for mixed precision case in onednn
auto out_fmt = input_layout.format;
if (node.get_preferred_impl_type() == impl_types::onednn) {
if (data_type_traits::is_i8_u8(output_type)) {
if (input_layout.format == format::b_fs_yx_fsv16)
out_fmt = format::b_fs_yx_fsv32;
else if (input_layout.format == format::bs_fs_yx_bsv32_fsv16)
out_fmt = format::bs_fs_yx_bsv32_fsv32;
} else if (data_type_traits::is_floating_point(output_type)) {
if (input_layout.format == format::b_fs_yx_fsv32)
out_fmt = format::b_fs_yx_fsv16;
else if (input_layout.format == format::bs_fs_yx_bsv32_fsv32)
out_fmt = format::bs_fs_yx_bsv32_fsv16;
}
}
// get output feature map from weights. It should be the same as number of biases. Will be verifed in
// convolution::create()
auto group = desc->groups;
@@ -224,7 +208,7 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
return {output_type, format::b_fs_yx_32fp, output_size};
}
return {output_type, out_fmt, output_size};
return {output_type, input_layout.format, output_size};
}
auto output_range = calc_sliding_window_output_range<swor_mode::all>(input_layout.size,
@@ -247,7 +231,8 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
if (output_type == data_types::bin) {
return {output_type, format::b_fs_yx_32fp, output_size};
}
return {output_type, out_fmt, output_size};
return {output_type, input_layout.format, output_size};
}
std::string convolution_inst::to_string(convolution_node const& node) {

View File

@@ -16,21 +16,15 @@
#include "permute_inst.h"
#include "depth_to_space_inst.h"
#include "region_yolo_inst.h"
#include "intel_gpu/runtime/debug_configuration.hpp"
using namespace cldnn;
#define LOG_NODE_REMOVAL(id) GPU_DEBUG_IF(debug_config->verbose >= 2) { \
GPU_DEBUG_COUT << "[remove_redundant_reorders:" << __LINE__ << "] " << "Remove node: " << (id) << std::endl; }
remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing, bool update_implementations,
bool remove_output_reorders)
: base_pass("remove_redundant_reorders"), lo(lo_ref), enable_reorder_fusing(enable_reorder_fusing), update_implementations(update_implementations),
remove_output_reorders(remove_output_reorders) {}
void remove_redundant_reorders::run(program& p) {
GPU_DEBUG_GET_INSTANCE(debug_config);
auto update_implementation = [&](program_node& node) {
if (!update_implementations)
return;
@@ -119,7 +113,6 @@ void remove_redundant_reorders::run(program& p) {
}
node.can_be_optimized(true);
LOG_NODE_REMOVAL(node.id());
p.extract_and_remove(node);
for (auto rl : recalc_list) {
@@ -175,7 +168,6 @@ void remove_redundant_reorders::run(program& p) {
dep_prim->output_format = output_layout.format;
dep_prim->output_data_type = output_layout.data_type;
LOG_NODE_REMOVAL(r_node.id());
r_node.can_be_optimized(true);
p.add_optimized_primitive_info(r_node.id());
p.extract_and_remove(r_node);
@@ -254,8 +246,6 @@ void remove_redundant_reorders::run(program& p) {
} else {
p.add_optimized_primitive_info(r_node.get_primitive()->id);
}
LOG_NODE_REMOVAL(r_node.id());
p.extract_and_remove(
r_node); // try to remove if possible (with respect to r_node not being marked as output)
}
@@ -302,8 +292,6 @@ void remove_redundant_reorders::run(program& p) {
// pointing to, we should increment it again
if (remove_reorder_node == *itr)
itr++;
LOG_NODE_REMOVAL(remove_reorder_node->id());
p.replace_all_usages(*remove_reorder_node, *node);
p.add_optimized_primitive_info(remove_reorder_node->id());
p.remove_all_connections(*remove_reorder_node);
@@ -348,8 +336,6 @@ void remove_redundant_reorders::run(program& p) {
if (input.type()->does_possible_implementation_exist(input)) {
node.can_be_optimized(true);
p.add_optimized_primitive_info(node.id());
LOG_NODE_REMOVAL(node.id());
p.extract_and_remove(node);
} else {
input.set_output_layout(old_output_layout_of_input, false);
@@ -377,8 +363,6 @@ void remove_redundant_reorders::run(program& p) {
continue;
dep.merge_output_padding(node.get_output_layout().data_padding);
LOG_NODE_REMOVAL(node.id());
p.replace_all_usages(node, dep);
p.add_optimized_primitive_info(node.id());
p.remove_all_connections(node);
@@ -410,7 +394,6 @@ void remove_redundant_reorders::run(program& p) {
return false;
dep.merge_output_padding(node->get_output_layout().data_padding);
LOG_NODE_REMOVAL(node->id());
p.replace_all_usages(*node, dep);
p.get_processing_order().erase(node);
p.add_optimized_primitive_info(node->id());
@@ -472,7 +455,6 @@ void remove_redundant_reorders::run(program& p) {
node->set_input_layout(local_desc.input_layout);
// remove reorder node
LOG_NODE_REMOVAL(node->id());
node->can_be_optimized(true);
p.add_optimized_primitive_info(node->id());
p.extract_and_remove(*node);
@@ -540,14 +522,12 @@ void remove_redundant_reorders::run(program& p) {
reshape_node.get_fused_activations_funcs().empty() && reshape_node.get_fused_primitives().empty();
if (remove_dep) {
LOG_NODE_REMOVAL(reshape_input_node.id());
reshape_input_node.can_be_optimized(true);
p.add_optimized_primitive_info(reshape_input_node.id());
p.extract_and_remove(reshape_input_node);
}
if (remove_current) {
LOG_NODE_REMOVAL(reshape_node.id());
reshape_node.can_be_optimized(true);
p.add_optimized_primitive_info(reshape_node.id());
p.extract_and_remove(reshape_node);

View File

@@ -20,7 +20,6 @@
#include <list>
#include <map>
#include <set>
#include <tuple>
using namespace cldnn;
@@ -563,7 +562,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
}
};
const auto reorder_convolution = [&p, &lo, &rf, &debug_config](typed_program_node<convolution>& conv_node) {
const auto reorder_convolution = [&p, &lo, &rf](typed_program_node<convolution>& conv_node) {
{
// reorder weights convolution
auto& weights = conv_node.weights();
@@ -603,43 +602,35 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
conv_node.get_dependencies().front()->set_output_layout(new_layout, false);
}
// reorder for onednn mixed-precision conv
// If the layouts are like below, change input layout to fsv32.
// From:
// (bsv32_fsv16.u8) --> conv --> (bsv32_fsv16.fp16)
// To:
// (bsv32_fsv16.u8) --> reorder --> (bsv32_fsv32.u8) --> conv --> (bsv32_fsv16.fp16)
//
// Do not apply such change for b=1 first conv
enum class __data_type {i8_u8, floating_point};
// Errata for mixed precision in onednn
// data_type, wrong_format, correct_format
std::vector<std::tuple<__data_type, format, format>> errata = {
{__data_type::i8_u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32},
{__data_type::i8_u8, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv32},
{__data_type::floating_point, format::b_fs_yx_fsv32, format::b_fs_yx_fsv16},
{__data_type::floating_point, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv16}};
for (auto &e : errata) {
std::vector<format> wrong_format = {format::b_fs_yx_fsv16, format::bs_fs_yx_bsv32_fsv16};
std::vector<format> correct_format = {format::b_fs_yx_fsv32, format::bs_fs_yx_bsv32_fsv32};
for (int i = 0; i < wrong_format.size(); i++) {
// reorder for onednn mixed-precision conv
// If the layouts are like below, change input layout to fsv32.
// From:
// (bsv32_fsv16.u8) --> conv --> (bsv32_fsv16.fp16)
// To:
// (bsv32_fsv16.u8) --> reorder --> (bsv32_fsv32.u8) --> conv --> (bsv32_fsv16.fp16)
//
// Do not apply such change for b=1 first conv
auto prev_node = conv_node.get_dependencies().front();
auto prev_layout = prev_node->get_output_layout();
auto old_layout = prev_node->get_output_layout();
auto conv_layout = conv_node.get_output_layout();
auto is_target_dt_in_errata = (std::get<0>(e) == __data_type::i8_u8 && data_type_traits::is_i8_u8(prev_layout.data_type)) ||
(std::get<0>(e) == __data_type::floating_point && data_type_traits::is_floating_point(prev_layout.data_type));
auto wrong_format = std::get<1>(e);
auto correct_format = std::get<2>(e);
if (lo.get_optimization_attributes().use_onednn_impls
&& is_target_dt_in_errata
&& conv_layout.format == wrong_format
&& prev_layout.format == wrong_format
&& !(prev_layout.size.batch[0] == 1 && prev_layout.size.feature[0] <= 4)) {
auto new_layout = prev_layout;
new_layout.format = correct_format;
&& conv_layout.format == wrong_format[i]
&& data_type_traits::is_i8_u8(old_layout.data_type)
&& (old_layout.format == wrong_format[i])
&& !(old_layout.size.batch[0] == 1 && old_layout.size.feature[0] <= 4)) {
auto new_layout = old_layout;
new_layout.format = correct_format[i];
auto new_input = rf.get_reorder(prev_node->id(),
prev_layout,
old_layout,
new_layout);
if (new_input.first)
if (new_input.first) {
p.add_intermediate(new_input.first, conv_node, 0, !new_input.second);
}
// Prevent layout propagation as we are using mixed precision for conv
conv_node.get_dependencies().front()->set_output_layout(new_layout, false);