From 864b5075b72e44cb3f618164bf909d89a5251cb9 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Wed, 1 Feb 2023 21:32:49 -0800 Subject: [PATCH] Allocate output to host if it is to be used by other node's shape infer dependency, because it requires copy to host in shape inference. (#15386) --- .../src/graph/include/activation_inst.h | 2 ++ .../src/graph/include/arg_max_min_inst.h | 2 ++ .../src/graph/include/convolution_inst.h | 3 +++ .../intel_gpu/src/graph/include/cum_sum_inst.h | 2 ++ .../graph/include/deformable_convolution_inst.h | 2 ++ .../src/graph/include/gather_tree_inst.h | 11 +++++++++++ .../src/graph/include/non_max_suppression_inst.h | 2 ++ .../intel_gpu/src/graph/include/normalize_inst.h | 1 + .../intel_gpu/src/graph/include/program_node.h | 16 ++++++++++++++++ .../intel_gpu/src/graph/include/reduce_inst.h | 10 ++++++++++ .../intel_gpu/src/graph/include/select_inst.h | 1 + .../intel_gpu/src/graph/layout_optimizer.cpp | 12 +++--------- .../intel_gpu/src/graph/primitive_inst.cpp | 6 +++++- src/plugins/intel_gpu/src/graph/program.cpp | 4 +++- 14 files changed, 63 insertions(+), 11 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/include/activation_inst.h b/src/plugins/intel_gpu/src/graph/include/activation_inst.h index 2ee052d777c..ebcf3c4734b 100644 --- a/src/plugins/intel_gpu/src/graph/include/activation_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/activation_inst.h @@ -25,6 +25,8 @@ public: program_node& input() const { return get_dependency(0); } program_node& slope_input() const { return get_dependency(1); } + std::vector get_shape_infer_dependencies() const override { return {}; } + bool is_parameterized() const { return !typed_desc()->additional_params_input.empty(); } std::shared_ptr get_fuse_params() const override { diff --git a/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h b/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h index 2c6f5f87ddd..e1cf70c3d3e 100644 --- a/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/arg_max_min_inst.h @@ -24,6 +24,8 @@ public: } bool has_second_output() const { return get_output_nums() == 2; } bool use_multiple_outputs() const { return get_primitive()->input_size() != 3; } + + std::vector get_shape_infer_dependencies() const override { return {1}; } }; using arg_max_min_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h index b935f3aa75b..4c19437ce16 100644 --- a/src/plugins/intel_gpu/src/graph/include/convolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/convolution_inst.h @@ -92,6 +92,9 @@ public: bool compensation_term() const { return get_primitive()->compensation.size() > 0; } bool activations_zero_points_term() const { return get_primitive()->activations_zero_points.size() > 0; } + // Currently convolution with constant weight is only supported for dynamic shape + std::vector get_shape_infer_dependencies() const override { return {}; } + using parent::get_kernel_impl_params; std::unique_ptr get_kernel_impl_params(const std::vector& in_layouts, const std::vector& out_layouts) const override { auto params = parent::get_kernel_impl_params(in_layouts, out_layouts); diff --git a/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h b/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h index 1c71612829b..2f5538e46e2 100644 --- a/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h @@ -16,6 +16,8 @@ public: program_node& input(size_t index = 0) const { return get_dependency(index); } size_t inputs_count() const { return get_dependencies().size(); } + + std::vector get_shape_infer_dependencies() const override { return {}; } }; using cum_sum_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/include/deformable_convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/deformable_convolution_inst.h index c9ff4bc433d..08816057192 100644 --- a/src/plugins/intel_gpu/src/graph/include/deformable_convolution_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/deformable_convolution_inst.h @@ -42,6 +42,8 @@ public: return params; } + std::vector get_shape_infer_dependencies() const override { return {}; } + private: bool transposed; uint32_t groups; diff --git a/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h b/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h index dad91a9c1e0..9200f3a7982 100644 --- a/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gather_tree_inst.h @@ -10,6 +10,17 @@ #include namespace cldnn { +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + typed_program_node(const std::shared_ptr prim, program& prog) : parent(prim, prog) {} + +public: + using parent::parent; + + program_node& input(size_t index = 0) const { return get_dependency(index); } + std::vector get_shape_infer_dependencies() const override { return {}; } +}; using gather_tree_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h b/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h index 5110a76c1b8..b279b14f865 100644 --- a/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/non_max_suppression_inst.h @@ -80,6 +80,8 @@ public: return get_dependency(offset); } bool use_multiple_outputs() const { return get_primitive()->output_size() == 3; } + + std::vector get_shape_infer_dependencies() const override { return {2}; } }; using non_max_suppression_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/include/normalize_inst.h b/src/plugins/intel_gpu/src/graph/include/normalize_inst.h index b98bca32ffb..3c741c9ab08 100644 --- a/src/plugins/intel_gpu/src/graph/include/normalize_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/normalize_inst.h @@ -19,6 +19,7 @@ public: program_node& input() const { return get_dependency(0); } program_node& scale() const { return get_dependency(1); } + std::vector get_shape_infer_dependencies() const override { return {}; } }; using normalize_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 18063ca0500..8042bebb57a 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -83,6 +83,22 @@ public: return res; } + bool is_shape_infer_dep(void) const { + if (!myprog.get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) + return false; + for (auto u : users) { + for (auto dep_idx : u->get_shape_infer_dependencies()) { + if (u->get_dependencies().size() <= dep_idx) { + continue; + } + if (u->get_dependency(dep_idx).get_unique_id() == unique_id) { + return true; + } + } + } + return false; + } + std::map get_const_memory_deps() const; virtual std::unique_ptr get_kernel_impl_params() const { diff --git a/src/plugins/intel_gpu/src/graph/include/reduce_inst.h b/src/plugins/intel_gpu/src/graph/include/reduce_inst.h index 619cca9eb89..19f174338c6 100644 --- a/src/plugins/intel_gpu/src/graph/include/reduce_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reduce_inst.h @@ -9,7 +9,17 @@ #include namespace cldnn { +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + typed_program_node(const std::shared_ptr prim, program& prog) : parent(prim, prog) {} +public: + using parent::parent; + + program_node& input(size_t index = 0) const { return get_dependency(index); } + std::vector get_shape_infer_dependencies() const override { return {}; } +}; using reduce_node = typed_program_node; template <> diff --git a/src/plugins/intel_gpu/src/graph/include/select_inst.h b/src/plugins/intel_gpu/src/graph/include/select_inst.h index e51b1a177b8..ce6b9d67109 100644 --- a/src/plugins/intel_gpu/src/graph/include/select_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/select_inst.h @@ -18,6 +18,7 @@ public: program_node& input(size_t idx = 0) const { return get_dependency(idx); } size_t inputs_count() const { return get_dependencies().size(); } + std::vector get_shape_infer_dependencies() const override { return {}; } }; using select_node = typed_program_node