Files
openvino/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
hyunback kim 6a63cc1936 [GPU] Apply fusing/reshape changes into master. (#8002)
* [GPU] Apply fusing/reshape code into master.

Merge prepare_primitive_fusing, handle_reshape into master.

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Apply code-review.

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Apply code-review

Signed-off-by: hyunback <hyunback.kim@intel.com>
2021-10-20 09:08:39 +09:00

397 lines
11 KiB
C++

// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "cldnn/graph/program.hpp"
#include "layout_optimizer.h"
#include "split_inst.h"
#include "lstm_inst.h"
#include "lstm_dynamic_inst.h"
#include "quantize_inst.h"
#include "eltwise_inst.h"
#include "convolution_inst.h"
#include <string>
#include <vector>
#include <memory>
#include <list>
#include <utility>
#include <set>
#include <functional>
#include <fstream>
namespace cldnn {
class base_pass {
friend class pass_manager;
public:
explicit base_pass(const std::string& pass_name) : name(pass_name) {}
virtual void run(program& p) = 0;
std::string get_name() { return name; }
void clean_marks(program& p) {
for (auto& node : p.get_processing_order()) {
node->unmark();
}
}
private:
const std::string name;
};
class pass_manager {
public:
explicit pass_manager(program& p);
void run(program& p, base_pass& pass);
uint32_t get_pass_count() { return pass_count; }
uint32_t inc_pass_count() { return ++pass_count; }
~pass_manager() {}
private:
uint32_t pass_count;
std::ofstream graph_opt_log;
};
class add_required_reorders : public base_pass {
public:
add_required_reorders() : base_pass("add_required_reorders") {}
private:
void run(program& p) override;
void add_reorder(program& p, program_node* node, program_node* usr);
};
class add_reshape_to_primitives : public base_pass {
public:
add_reshape_to_primitives() : base_pass("add_reshape_to_primitives_pass") {}
private:
void run(program& p) override;
};
class calculate_prior_boxes : public base_pass {
public:
calculate_prior_boxes() : base_pass("calculated_prior_boxes") {}
private:
void run(program& p) override;
};
class compile_graph : public base_pass {
public:
compile_graph() : base_pass("compile_graph") {}
private:
void run(program& p) override;
};
class eltwise_shrinking : public base_pass {
public:
eltwise_shrinking() : base_pass("eltwise_shrinking") {}
private:
void run(program& p) override;
};
class eltwise_remove_stride : public base_pass {
public:
eltwise_remove_stride() : base_pass("eltwise_remove_stride") {}
private:
void run(program& p) override;
void conv_stride_extend(program& p, program_node& node, cldnn::tensor& tensor);
};
class graph_initializations : public base_pass {
public:
graph_initializations() : base_pass("init") {}
private:
void run(program& p) override;
void handle_split_node(program& p, split_node& node);
void handle_lstm_node(program& p, lstm_node& node);
void handle_dynamic_lstm_node(program& p, lstm_dynamic_node& node);
void set_outputs(program& p);
};
class handle_reshape : public base_pass {
public:
handle_reshape() : base_pass("handle_reshape") {}
private:
void run(program& p) override;
};
class handle_input_padding : public base_pass {
public:
handle_input_padding() : base_pass("handle_input_padding") {}
private:
void run(program& p) override;
};
class mark_nodes : public base_pass {
public:
mark_nodes() : base_pass("analyzed_graph") {}
private:
void run(program& p) override;
};
class prepare_buffer_fusing : public base_pass {
public:
prepare_buffer_fusing() : base_pass("prepare_buffer_fusing") {}
private:
void run(program& p) override;
};
class prepare_quantization : public base_pass {
public:
prepare_quantization() : base_pass("prepare_quantization") {}
private:
void run(program& p) override;
void handle_quantize_node(program& p, quantize_node& quantize_node);
void prepare_packed_quantize(program& p, quantize_node& quantize_node);
void prepare_dequantize_merge(program& p, eltwise_node& eltwise_node);
void remove_fake_reorders(program& p, reorder_node& reorder_node);
void prepare_asymmetric_quantization(program& p, convolution_node& convolution_node);
void prepare_scale_shift_opt(program &p, quantize_node& quantize_node);
};
class prepare_conv_eltw_fusing : public base_pass {
public:
explicit prepare_conv_eltw_fusing(layout_optimizer& lo_ref, bool b_fs_yx_fsv16_opt = false) :
base_pass("prepare_conv_eltw_fusing"), _lo(lo_ref), b_fs_yx_fsv16_opt(b_fs_yx_fsv16_opt) {}
private:
void run(program& p) override;
void fuse_conv_eltwise(program& p, program_node* node);
void fuse_conv_depth_to_space(program& p, program_node* node);
layout_optimizer& _lo;
bool b_fs_yx_fsv16_opt;
};
class prepare_conv_eltw_read_write_opt : public base_pass {
public:
prepare_conv_eltw_read_write_opt() : base_pass("prepare_conv_eltw_read_write_opt") {}
private:
void run(program& p) override;
void conv_eltwise_read_write_opt(program& p, program_node* node);
};
class prepare_primitive_fusing : public base_pass {
public:
explicit prepare_primitive_fusing(layout_optimizer& lo_ref) :
base_pass("prepare_primitive_fusing"), _lo(lo_ref) {}
private:
void run(program& p) override;
void fuse_sigmoid_mul_to_swish(program &p);
void fuse_bias(program &p);
void fuse_reorders(program& p);
void fuse_activations(program& p);
void fuse_simple_primitives(program &p);
void optimize_fused_ops(program &p);
void remove_redundant_reshape(program &p);
layout_optimizer& _lo;
};
class pre_replace_deconv : public base_pass {
public:
explicit pre_replace_deconv(layout_optimizer& lo_ref) :
base_pass("pre_replace_deconv"), _lo(lo_ref) {}
private:
void run(program& p) override;
layout_optimizer& _lo;
};
class pre_optimize_bias : public base_pass {
public:
explicit pre_optimize_bias(reorder_factory& rf_ref);
private:
void run(program& p) override;
virtual void run(program& p, reorder_factory& rf);
template <typename T>
void optimize_bias(T& node, reorder_factory& rf, program& p);
reorder_factory& _rf;
};
class prepare_padding : public base_pass {
public:
explicit prepare_padding(bool output_size_handling_enabled_switch)
: base_pass("prepare_padding"), output_size_handling_enabled(output_size_handling_enabled_switch) {}
private:
void run(program& p) override;
bool output_size_handling_enabled;
};
class post_input_reorder : public base_pass {
public:
post_input_reorder() : base_pass("post_input_reorder") {}
private:
void run(program& p) override;
program_node& add_reorder(program& p, program_node* node, program_node* usr, const layout& reorder_layout);
};
class post_optimize_weights : public base_pass {
public:
explicit post_optimize_weights(reorder_factory& rf_ref);
private:
struct weights_bias_offset {
size_t weights_offset;
size_t bias_offset;
// When using this ctor weights offset is added to the bias_offset
weights_bias_offset(const size_t w_offset, const size_t b_offset)
: weights_offset(w_offset)
, bias_offset(weights_offset + b_offset)
{}
};
void run(program& p) override;
template<typename T>
weights_bias_offset get_weights_bias_offset(const T& node);
template<typename T>
void optimize_weights(T& node, program& p);
reorder_factory& _rf;
};
class propagate_constants : public base_pass {
public:
propagate_constants() : base_pass("propagate_constants") {}
private:
void run(program& p) override;
std::list<std::pair<primitive_id, memory::ptr>> calculate(engine& engine, build_options bo);
bool has_non_const_user(program_node& node) const;
void handle_constant(program& prog, program_node& node);
void add_constant(program& prog, program_node& node);
void add_deps_to_tpl(program& prog, const std::vector<program_node*>& node);
bool has_non_trivial_constants = false;
std::list<typed_program_node<data>*> const_inputs;
std::vector<primitive_id> const_outputs;
std::set<std::shared_ptr<program_node>> nodes;
};
class remove_redundant_reorders : public base_pass {
public:
explicit remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing = false, bool update_implementations = false,
bool remove_output_reorders = false);
void run(program& p) override;
private:
layout_optimizer& lo;
bool enable_reorder_fusing;
bool update_implementations;
bool remove_output_reorders;
};
class reorder_inputs : public base_pass {
public:
reorder_inputs(layout_optimizer& lo_ref, reorder_factory& rf_ref);
private:
void run(program& p) override;
virtual void run(program& p, layout_optimizer& lo, reorder_factory& rf);
layout_optimizer& _lo;
reorder_factory& _rf;
};
class trim_to_outputs : public base_pass {
public:
trim_to_outputs() : base_pass("trimmed") {}
private:
void run(program& p) override;
};
class strided_slice_optimize : public base_pass {
public:
strided_slice_optimize() : base_pass("strided_slice_optimize") {}
void run(program& p) override;
};
class reverse_optional_nodes_outputs : public base_pass {
public:
reverse_optional_nodes_outputs() : base_pass("reverse_optional_nodes_outputs") {}
void run(program& p) override;
};
class concat_input_order : public base_pass {
// This optimization changes order of inputs for concatenation to provide
// better alignment for execution and allow for optimizing out in some cases.
// For example concatenation along features with inputs [13, 1024] in format fsv16
// has only first input aligned to feature blocks, blocking performant implementation
// for second one.
// This can be fixed by chaning order to [1024, 13] and fusing reshuffling of those features
// into following layers, such as convolution or fully connected, where it can be
// implemented as compile-time weights shuffling.
//
// Requirements - may work incorrectly if not fullfiled:
// - formats are selected
// - implementations aren't selected
//
// Soft requirements - reduce applicability if not fullfiled:
// - constant primitives are reduced to data nodes
// - no fused primitives
public:
concat_input_order() : base_pass("concat_input_order") {}
void run(program& p) override;
};
class memory_dependency_pass : public base_pass {
public:
explicit memory_dependency_pass(const std::string& pass_name) : base_pass(pass_name) {}
void add_memory_dependency(program_node* node, program_node* dep) {
if (node->can_be_optimized() || !dep->can_be_optimized()) {
node->add_memory_dependency(dep->id());
} else {
if (node->id() == dep->id()) {
return;
}
for (auto subdep : dep->get_dependencies()) {
add_memory_dependency(node, subdep);
add_memory_dependency(subdep, node);
}
}
}
};
class basic_memory_dependencies : public memory_dependency_pass {
public:
basic_memory_dependencies() : memory_dependency_pass("basic_memory_dependencies") {}
void run(program& p) override;
};
class skipped_branch_memory_dependencies : public memory_dependency_pass {
public:
skipped_branch_memory_dependencies() : memory_dependency_pass("skipped_branch_memory_dependencies") {}
void run(program& p) override;
};
class oooq_memory_dependencies : public memory_dependency_pass {
public:
oooq_memory_dependencies() : memory_dependency_pass("oooq_memory_dependencies") {}
void run(program& p) override;
};
class update_loop_primitive_map : public base_pass {
public:
update_loop_primitive_map() : base_pass("update_loop_primitive_map") {}
private:
void run(program& p) override;
};
} // namespace cldnn