[GPU] Resolve fp16 overflow of mul (#19173)
+ Fixed black output image by Nan output. + Resolved fp16 overflow of gemm primitive before softmax + Added fused post ops of clamp activation not to get inf which caused Nan output + Added new pass modify_fused_ops Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
parent
49bbcb4cf6
commit
1b9de79d0d
@ -0,0 +1,41 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "pass_manager.h"
|
||||
#include "program_node.h"
|
||||
|
||||
#include "gemm_inst.h"
|
||||
#include "reshape_inst.h"
|
||||
#include "softmax_inst.h"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
void clamp_fp16_output::run(program& p) {
|
||||
for (auto& node : p.get_processing_order()) {
|
||||
// Add clamp activation to avoid inf result which causes Nan output
|
||||
if (node->is_type<gemm>() && !node->is_output() && node->get_output_layout().data_type == data_types::f16) {
|
||||
auto user = node->get_users().front();
|
||||
// Reshape could be added in CreateMatMulOp : check a user node of the Reshape
|
||||
if (user->is_type<reshape>())
|
||||
user = user->get_users().front();
|
||||
|
||||
if (user->is_type<softmax>()) {
|
||||
float out_lo = data_type_traits::min<float>(data_types::f16);
|
||||
float out_hi = data_type_traits::max<float>(data_types::f16);
|
||||
auto activ_id = node->id() + "_overflow_clip";
|
||||
auto activ = std::make_shared<activation>(activ_id, input_info(node->id()),
|
||||
activation_func::clamp, activation_additional_params{out_lo, out_hi});
|
||||
program_node& act_node = p.get_or_create(activ);
|
||||
|
||||
fused_primitive_desc local_desc(activ);
|
||||
local_desc.input_layout = node->get_output_layout();
|
||||
local_desc.f_param = act_node.get_fuse_params();
|
||||
local_desc.outer_dep_start_idx = -1; // No external dep
|
||||
local_desc.total_num_deps = 0;
|
||||
local_desc.output_layout = node->get_output_layout();
|
||||
node->add_fused_primitive(local_desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -104,6 +104,14 @@ private:
|
||||
void run(program& p) override;
|
||||
};
|
||||
|
||||
class clamp_fp16_output : public base_pass {
|
||||
public:
|
||||
clamp_fp16_output() : base_pass("clamp_fp16_output") {}
|
||||
|
||||
private:
|
||||
void run(program& p) override;
|
||||
};
|
||||
|
||||
class mark_shape_of_subgraphs : public base_pass {
|
||||
// This optimization pass aggregates nodes into shape_of subgraphs for further optimizations.
|
||||
// There are few key requirements to decide if node belongs to shape_of subgraph or not:
|
||||
|
@ -593,6 +593,10 @@ void program::pre_optimize_graph(bool is_internal) {
|
||||
// check if there exists some layout incompatibilities and add an reorder node if required
|
||||
apply_opt_pass<add_required_reorders>();
|
||||
|
||||
// Modify fused post operation to resolve overflow of fp16 output by adding clamp activation
|
||||
// Currently, 'gemm-softmax' case is applied for clamping
|
||||
apply_opt_pass<clamp_fp16_output>();
|
||||
|
||||
// add optimization attributes for onednn primitives
|
||||
apply_opt_pass<add_onednn_optimization_attributes>();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user