Revert "[GPU] Mixed precision fix for mask rcnn (#10467)" (#10515)

This reverts commit 10ac5b280b.
2022-02-18 16:38:49 +03:00
parent d9b1f10074
commit 7985c92095
3 changed files with 26 additions and 70 deletions
--- a/src/plugins/intel_gpu/src/graph/convolution.cpp
+++ b/src/plugins/intel_gpu/src/graph/convolution.cpp
@@ -165,22 +165,6 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
                      input_layout.data_padding};
    }

-    // Adjust output format for mixed precision case in onednn
-    auto out_fmt = input_layout.format;
-    if (node.get_preferred_impl_type() == impl_types::onednn) {
-        if (data_type_traits::is_i8_u8(output_type)) {
-            if (input_layout.format == format::b_fs_yx_fsv16)
-                out_fmt = format::b_fs_yx_fsv32;
-            else if (input_layout.format == format::bs_fs_yx_bsv32_fsv16)
-                out_fmt = format::bs_fs_yx_bsv32_fsv32;
-        } else if (data_type_traits::is_floating_point(output_type)) {
-            if (input_layout.format == format::b_fs_yx_fsv32)
-                out_fmt = format::b_fs_yx_fsv16;
-            else if (input_layout.format == format::bs_fs_yx_bsv32_fsv32)
-                out_fmt = format::bs_fs_yx_bsv32_fsv16;
-        }
-    }
-
    // get output feature map from weights. It should be the same as number of biases. Will be verifed in
    // convolution::create()
    auto group = desc->groups;
@@ -224,7 +208,7 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
            return {output_type, format::b_fs_yx_32fp, output_size};
        }

-        return {output_type, out_fmt, output_size};
+        return {output_type, input_layout.format, output_size};
    }

    auto output_range = calc_sliding_window_output_range<swor_mode::all>(input_layout.size,
@@ -247,7 +231,8 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
    if (output_type == data_types::bin) {
        return {output_type, format::b_fs_yx_32fp, output_size};
    }
-    return {output_type, out_fmt, output_size};
+
+    return {output_type, input_layout.format, output_size};
 }

 std::string convolution_inst::to_string(convolution_node const& node) {
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
@@ -16,21 +16,15 @@
 #include "permute_inst.h"
 #include "depth_to_space_inst.h"
 #include "region_yolo_inst.h"
-#include "intel_gpu/runtime/debug_configuration.hpp"

 using namespace cldnn;

-#define LOG_NODE_REMOVAL(id) GPU_DEBUG_IF(debug_config->verbose >= 2) {                                                         \
-                GPU_DEBUG_COUT << "[remove_redundant_reorders:" << __LINE__ << "] " << "Remove node: " << (id) << std::endl; }
-
-
 remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing, bool update_implementations,
    bool remove_output_reorders)
    : base_pass("remove_redundant_reorders"), lo(lo_ref), enable_reorder_fusing(enable_reorder_fusing), update_implementations(update_implementations),
    remove_output_reorders(remove_output_reorders) {}

 void remove_redundant_reorders::run(program& p) {
-    GPU_DEBUG_GET_INSTANCE(debug_config);
    auto update_implementation = [&](program_node& node) {
        if (!update_implementations)
            return;
@@ -119,7 +113,6 @@ void remove_redundant_reorders::run(program& p) {
            }

            node.can_be_optimized(true);
-            LOG_NODE_REMOVAL(node.id());
            p.extract_and_remove(node);

            for (auto rl : recalc_list) {
@@ -175,7 +168,6 @@ void remove_redundant_reorders::run(program& p) {
            dep_prim->output_format = output_layout.format;
            dep_prim->output_data_type = output_layout.data_type;

-            LOG_NODE_REMOVAL(r_node.id());
            r_node.can_be_optimized(true);
            p.add_optimized_primitive_info(r_node.id());
            p.extract_and_remove(r_node);
@@ -254,8 +246,6 @@ void remove_redundant_reorders::run(program& p) {
            } else {
                p.add_optimized_primitive_info(r_node.get_primitive()->id);
            }
-
-            LOG_NODE_REMOVAL(r_node.id());
            p.extract_and_remove(
                r_node);  // try to remove if possible (with respect to r_node not being marked as output)
        }
@@ -302,8 +292,6 @@ void remove_redundant_reorders::run(program& p) {
            // pointing to, we should increment it again
            if (remove_reorder_node == *itr)
                itr++;
-
-            LOG_NODE_REMOVAL(remove_reorder_node->id());
            p.replace_all_usages(*remove_reorder_node, *node);
            p.add_optimized_primitive_info(remove_reorder_node->id());
            p.remove_all_connections(*remove_reorder_node);
@@ -348,8 +336,6 @@ void remove_redundant_reorders::run(program& p) {
            if (input.type()->does_possible_implementation_exist(input)) {
                node.can_be_optimized(true);
                p.add_optimized_primitive_info(node.id());
-
-                LOG_NODE_REMOVAL(node.id());
                p.extract_and_remove(node);
            } else {
                input.set_output_layout(old_output_layout_of_input, false);
@@ -377,8 +363,6 @@ void remove_redundant_reorders::run(program& p) {
            continue;

        dep.merge_output_padding(node.get_output_layout().data_padding);
-
-        LOG_NODE_REMOVAL(node.id());
        p.replace_all_usages(node, dep);
        p.add_optimized_primitive_info(node.id());
        p.remove_all_connections(node);
@@ -410,7 +394,6 @@ void remove_redundant_reorders::run(program& p) {
            return false;

        dep.merge_output_padding(node->get_output_layout().data_padding);
-        LOG_NODE_REMOVAL(node->id());
        p.replace_all_usages(*node, dep);
        p.get_processing_order().erase(node);
        p.add_optimized_primitive_info(node->id());
@@ -472,7 +455,6 @@ void remove_redundant_reorders::run(program& p) {
            node->set_input_layout(local_desc.input_layout);

            // remove reorder node
-            LOG_NODE_REMOVAL(node->id());
            node->can_be_optimized(true);
            p.add_optimized_primitive_info(node->id());
            p.extract_and_remove(*node);
@@ -540,14 +522,12 @@ void remove_redundant_reorders::run(program& p) {
                              reshape_node.get_fused_activations_funcs().empty() && reshape_node.get_fused_primitives().empty();

        if (remove_dep) {
-            LOG_NODE_REMOVAL(reshape_input_node.id());
            reshape_input_node.can_be_optimized(true);
            p.add_optimized_primitive_info(reshape_input_node.id());
            p.extract_and_remove(reshape_input_node);
        }

        if (remove_current) {
-            LOG_NODE_REMOVAL(reshape_node.id());
            reshape_node.can_be_optimized(true);
            p.add_optimized_primitive_info(reshape_node.id());
            p.extract_and_remove(reshape_node);
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
@@ -20,7 +20,6 @@
 #include <list>
 #include <map>
 #include <set>
-#include <tuple>

 using namespace cldnn;

@@ -563,7 +562,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
        }
    };

-    const auto reorder_convolution = [&p, &lo, &rf, &debug_config](typed_program_node<convolution>& conv_node) {
+    const auto reorder_convolution = [&p, &lo, &rf](typed_program_node<convolution>& conv_node) {
        {
            // reorder weights convolution
            auto& weights = conv_node.weights();
@@ -603,43 +602,35 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
            conv_node.get_dependencies().front()->set_output_layout(new_layout, false);
        }

-        // reorder for onednn mixed-precision conv
-        // If the layouts are like below, change input layout to fsv32.
-        // From:
-        //   (bsv32_fsv16.u8) --> conv --> (bsv32_fsv16.fp16)
-        // To:
-        //   (bsv32_fsv16.u8) --> reorder --> (bsv32_fsv32.u8) --> conv --> (bsv32_fsv16.fp16)
-        //
-        // Do not apply such change for b=1 first conv
-        enum class __data_type {i8_u8, floating_point};
-        // Errata for mixed precision in onednn
-        // data_type, wrong_format, correct_format
-        std::vector<std::tuple<__data_type, format, format>> errata = {
-            {__data_type::i8_u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32},
-            {__data_type::i8_u8, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv32},
-            {__data_type::floating_point, format::b_fs_yx_fsv32, format::b_fs_yx_fsv16},
-            {__data_type::floating_point, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv16}};
-        for (auto &e : errata) {
+        std::vector<format> wrong_format = {format::b_fs_yx_fsv16, format::bs_fs_yx_bsv32_fsv16};
+        std::vector<format> correct_format = {format::b_fs_yx_fsv32, format::bs_fs_yx_bsv32_fsv32};
+        for (int i = 0; i < wrong_format.size(); i++) {
+            // reorder for onednn mixed-precision conv
+            // If the layouts are like below, change input layout to fsv32.
+            // From:
+            //   (bsv32_fsv16.u8) --> conv --> (bsv32_fsv16.fp16)
+            // To:
+            //   (bsv32_fsv16.u8) --> reorder --> (bsv32_fsv32.u8) --> conv --> (bsv32_fsv16.fp16)
+            //
+            // Do not apply such change for b=1 first conv
+
            auto prev_node = conv_node.get_dependencies().front();
-            auto prev_layout = prev_node->get_output_layout();
+            auto old_layout = prev_node->get_output_layout();
            auto conv_layout = conv_node.get_output_layout();
-            auto is_target_dt_in_errata = (std::get<0>(e) == __data_type::i8_u8 && data_type_traits::is_i8_u8(prev_layout.data_type)) ||
-                                          (std::get<0>(e) == __data_type::floating_point && data_type_traits::is_floating_point(prev_layout.data_type));
-            auto wrong_format = std::get<1>(e);
-            auto correct_format = std::get<2>(e);
            if (lo.get_optimization_attributes().use_onednn_impls
-                    && is_target_dt_in_errata
-                    && conv_layout.format == wrong_format
-                    && prev_layout.format == wrong_format
-                    && !(prev_layout.size.batch[0] == 1 && prev_layout.size.feature[0] <= 4)) {
-                auto new_layout = prev_layout;
-                new_layout.format = correct_format;
+                    && conv_layout.format == wrong_format[i]
+                    && data_type_traits::is_i8_u8(old_layout.data_type)
+                    && (old_layout.format == wrong_format[i])
+                    && !(old_layout.size.batch[0] == 1 && old_layout.size.feature[0] <= 4)) {
+                auto new_layout = old_layout;
+                new_layout.format = correct_format[i];
                auto new_input = rf.get_reorder(prev_node->id(),
-                                                prev_layout,
+                                                old_layout,
                                                new_layout);

-                if (new_input.first)
+                if (new_input.first) {
                    p.add_intermediate(new_input.first, conv_node, 0, !new_input.second);
+                }

                // Prevent layout propagation as we are using mixed precision for conv
                conv_node.get_dependencies().front()->set_output_layout(new_layout, false);