Allocate output to host if it is to be used by other node's shape infer dependency, because it requires copy to host in shape inference. (#15386)

This commit is contained in:
Taylor Yeonbok Lee 2023-02-01 21:32:49 -08:00 committed by GitHub
parent 29b15233c7
commit 864b5075b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 63 additions and 11 deletions

View File

@ -25,6 +25,8 @@ public:
program_node& input() const { return get_dependency(0); }
program_node& slope_input() const { return get_dependency(1); }
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
bool is_parameterized() const { return !typed_desc()->additional_params_input.empty(); }
std::shared_ptr<kernel_selector::fuse_params> get_fuse_params() const override {

View File

@ -24,6 +24,8 @@ public:
}
bool has_second_output() const { return get_output_nums() == 2; }
bool use_multiple_outputs() const { return get_primitive()->input_size() != 3; }
std::vector<size_t> get_shape_infer_dependencies() const override { return {1}; }
};
using arg_max_min_node = typed_program_node<arg_max_min>;

View File

@ -92,6 +92,9 @@ public:
bool compensation_term() const { return get_primitive()->compensation.size() > 0; }
bool activations_zero_points_term() const { return get_primitive()->activations_zero_points.size() > 0; }
// Currently convolution with constant weight is only supported for dynamic shape
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
using parent::get_kernel_impl_params;
std::unique_ptr<kernel_impl_params> get_kernel_impl_params(const std::vector<layout>& in_layouts, const std::vector<layout>& out_layouts) const override {
auto params = parent::get_kernel_impl_params(in_layouts, out_layouts);

View File

@ -16,6 +16,8 @@ public:
program_node& input(size_t index = 0) const { return get_dependency(index); }
size_t inputs_count() const { return get_dependencies().size(); }
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
};
using cum_sum_node = typed_program_node<cum_sum>;

View File

@ -42,6 +42,8 @@ public:
return params;
}
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
private:
bool transposed;
uint32_t groups;

View File

@ -10,6 +10,17 @@
#include <memory>
namespace cldnn {
template <>
struct typed_program_node<gather_tree> : public typed_program_node_base<gather_tree> {
using parent = typed_program_node_base<gather_tree>;
typed_program_node(const std::shared_ptr<gather_tree> prim, program& prog) : parent(prim, prog) {}
public:
using parent::parent;
program_node& input(size_t index = 0) const { return get_dependency(index); }
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
};
using gather_tree_node = typed_program_node<gather_tree>;

View File

@ -80,6 +80,8 @@ public:
return get_dependency(offset);
}
bool use_multiple_outputs() const { return get_primitive()->output_size() == 3; }
std::vector<size_t> get_shape_infer_dependencies() const override { return {2}; }
};
using non_max_suppression_node = typed_program_node<non_max_suppression>;

View File

@ -19,6 +19,7 @@ public:
program_node& input() const { return get_dependency(0); }
program_node& scale() const { return get_dependency(1); }
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
};
using normalize_node = typed_program_node<normalize>;

View File

@ -83,6 +83,22 @@ public:
return res;
}
bool is_shape_infer_dep(void) const {
if (!myprog.get_config().get_property(ov::intel_gpu::allow_new_shape_infer))
return false;
for (auto u : users) {
for (auto dep_idx : u->get_shape_infer_dependencies()) {
if (u->get_dependencies().size() <= dep_idx) {
continue;
}
if (u->get_dependency(dep_idx).get_unique_id() == unique_id) {
return true;
}
}
}
return false;
}
std::map<size_t, memory::ptr> get_const_memory_deps() const;
virtual std::unique_ptr<kernel_impl_params> get_kernel_impl_params() const {

View File

@ -9,7 +9,17 @@
#include <string>
namespace cldnn {
template <>
struct typed_program_node<reduce> : public typed_program_node_base<reduce> {
using parent = typed_program_node_base<reduce>;
typed_program_node(const std::shared_ptr<reduce> prim, program& prog) : parent(prim, prog) {}
public:
using parent::parent;
program_node& input(size_t index = 0) const { return get_dependency(index); }
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
};
using reduce_node = typed_program_node<reduce>;
template <>

View File

@ -18,6 +18,7 @@ public:
program_node& input(size_t idx = 0) const { return get_dependency(idx); }
size_t inputs_count() const { return get_dependencies().size(); }
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
};
using select_node = typed_program_node<select>;

View File

@ -1684,15 +1684,9 @@ format layout_optimizer::get_preferred_format(program_node& node) {
node.set_preferred_input_fmt(0, get_preferred_format(node.get_dependency(0)));
// shape_infer_dep should be plain format because the memory is being read by ngraph shape infer as is
for (auto u : node.get_users()) {
for (auto dep_idx : u->get_shape_infer_dependencies()) {
if (u->get_dependencies().size() <= dep_idx)
continue;
if (u->get_dependency(dep_idx).get_unique_id() == node.get_unique_id()) {
expected = format::get_default_format(output_layout.get_rank(), false, false);
return expected;
}
}
if (node.is_shape_infer_dep()) {
expected = format::get_default_format(output_layout.get_rank(), false, false);
return expected;
}
}
if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {

View File

@ -776,6 +776,7 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
return pool.get_memory(static_layout, type);
};
auto layout = impl_params.get_output_layout(idx);
OPENVINO_ASSERT(layout.is_static() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout");
auto device_mem_acc = [&](size_t a, const cldnn::layout& l) {
@ -804,8 +805,11 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
auto use_lockable_memory = is_output_buffer(_node) || is_cpu || is_any_user_cpu(_node.get_users()) ||
!_engine.supports_allocation(allocation_type::usm_device);
const auto& lockable_mem_type = _engine.get_lockable_preferred_memory_allocation_type(layout.format.is_image_2d());
// If this node is to be used as shape infer, it needs to copy data to be used by shape infer.
auto alloc_type = use_lockable_memory ? lockable_mem_type
: usm_device_allocatable ? allocation_type::usm_device : lockable_mem_type;
: !usm_device_allocatable ? lockable_mem_type :
!_node.is_shape_infer_dep() ? allocation_type::usm_device : lockable_mem_type;
if ((is_internal && (_node.can_be_optimized() || _node.is_type<generic_layer>())) || (memory_reuse_by_user == false)) {
GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;

View File

@ -738,13 +738,15 @@ void program::transfer_memory_to_device() {
return;
for (auto& node : processing_order) {
if (node->is_shape_infer_dep()) {
continue;
}
if (node->is_type<data>() && !node->need_lockable_memory()) {
auto& data_node = node->as<data>();
auto data_node_layout = data_node.get_output_layout();
auto& mem = data_node.get_attached_memory();
auto mem_layout = mem.get_layout();
auto alloc_type = mem.get_allocation_type();
if (!mem_layout.compatible(data_node_layout)) {
std::string err_str("Node and memory layouts are incompatible, error occurred for " + node->id() + " node");
throw std::invalid_argument(err_str);