[GPU] Resolve fp16 overflow of mul (#19173)

+ Fixed black output image by Nan output.
+ Resolved fp16 overflow of gemm primitive before softmax
+ Added fused post ops of clamp activation not to get inf which caused Nan output
+ Added new pass modify_fused_ops

Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
Min, Byungil 2023-08-17 18:09:10 +09:00 committed by GitHub
parent 49bbcb4cf6
commit 1b9de79d0d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 0 deletions

View File

@ -0,0 +1,41 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "pass_manager.h"
#include "program_node.h"
#include "gemm_inst.h"
#include "reshape_inst.h"
#include "softmax_inst.h"
using namespace cldnn;
void clamp_fp16_output::run(program& p) {
for (auto& node : p.get_processing_order()) {
// Add clamp activation to avoid inf result which causes Nan output
if (node->is_type<gemm>() && !node->is_output() && node->get_output_layout().data_type == data_types::f16) {
auto user = node->get_users().front();
// Reshape could be added in CreateMatMulOp : check a user node of the Reshape
if (user->is_type<reshape>())
user = user->get_users().front();
if (user->is_type<softmax>()) {
float out_lo = data_type_traits::min<float>(data_types::f16);
float out_hi = data_type_traits::max<float>(data_types::f16);
auto activ_id = node->id() + "_overflow_clip";
auto activ = std::make_shared<activation>(activ_id, input_info(node->id()),
activation_func::clamp, activation_additional_params{out_lo, out_hi});
program_node& act_node = p.get_or_create(activ);
fused_primitive_desc local_desc(activ);
local_desc.input_layout = node->get_output_layout();
local_desc.f_param = act_node.get_fuse_params();
local_desc.outer_dep_start_idx = -1; // No external dep
local_desc.total_num_deps = 0;
local_desc.output_layout = node->get_output_layout();
node->add_fused_primitive(local_desc);
}
}
}
}

View File

@ -104,6 +104,14 @@ private:
void run(program& p) override;
};
class clamp_fp16_output : public base_pass {
public:
clamp_fp16_output() : base_pass("clamp_fp16_output") {}
private:
void run(program& p) override;
};
class mark_shape_of_subgraphs : public base_pass {
// This optimization pass aggregates nodes into shape_of subgraphs for further optimizations.
// There are few key requirements to decide if node belongs to shape_of subgraph or not:

View File

@ -593,6 +593,10 @@ void program::pre_optimize_graph(bool is_internal) {
// check if there exists some layout incompatibilities and add an reorder node if required
apply_opt_pass<add_required_reorders>();
// Modify fused post operation to resolve overflow of fp16 output by adding clamp activation
// Currently, 'gemm-softmax' case is applied for clamping
apply_opt_pass<clamp_fp16_output>();
// add optimization attributes for onednn primitives
apply_opt_pass<add_onednn_optimization_attributes>();