Allocate output to host if it is to be used by other node's shape infer dependency, because it requires copy to host in shape inference. (#15386)
This commit is contained in:
parent
29b15233c7
commit
864b5075b7
@ -25,6 +25,8 @@ public:
|
||||
program_node& input() const { return get_dependency(0); }
|
||||
program_node& slope_input() const { return get_dependency(1); }
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
|
||||
bool is_parameterized() const { return !typed_desc()->additional_params_input.empty(); }
|
||||
|
||||
std::shared_ptr<kernel_selector::fuse_params> get_fuse_params() const override {
|
||||
|
@ -24,6 +24,8 @@ public:
|
||||
}
|
||||
bool has_second_output() const { return get_output_nums() == 2; }
|
||||
bool use_multiple_outputs() const { return get_primitive()->input_size() != 3; }
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {1}; }
|
||||
};
|
||||
|
||||
using arg_max_min_node = typed_program_node<arg_max_min>;
|
||||
|
@ -92,6 +92,9 @@ public:
|
||||
bool compensation_term() const { return get_primitive()->compensation.size() > 0; }
|
||||
bool activations_zero_points_term() const { return get_primitive()->activations_zero_points.size() > 0; }
|
||||
|
||||
// Currently convolution with constant weight is only supported for dynamic shape
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
|
||||
using parent::get_kernel_impl_params;
|
||||
std::unique_ptr<kernel_impl_params> get_kernel_impl_params(const std::vector<layout>& in_layouts, const std::vector<layout>& out_layouts) const override {
|
||||
auto params = parent::get_kernel_impl_params(in_layouts, out_layouts);
|
||||
|
@ -16,6 +16,8 @@ public:
|
||||
|
||||
program_node& input(size_t index = 0) const { return get_dependency(index); }
|
||||
size_t inputs_count() const { return get_dependencies().size(); }
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
using cum_sum_node = typed_program_node<cum_sum>;
|
||||
|
@ -42,6 +42,8 @@ public:
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
|
||||
private:
|
||||
bool transposed;
|
||||
uint32_t groups;
|
||||
|
@ -10,6 +10,17 @@
|
||||
#include <memory>
|
||||
|
||||
namespace cldnn {
|
||||
template <>
|
||||
struct typed_program_node<gather_tree> : public typed_program_node_base<gather_tree> {
|
||||
using parent = typed_program_node_base<gather_tree>;
|
||||
typed_program_node(const std::shared_ptr<gather_tree> prim, program& prog) : parent(prim, prog) {}
|
||||
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input(size_t index = 0) const { return get_dependency(index); }
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
using gather_tree_node = typed_program_node<gather_tree>;
|
||||
|
||||
|
@ -80,6 +80,8 @@ public:
|
||||
return get_dependency(offset);
|
||||
}
|
||||
bool use_multiple_outputs() const { return get_primitive()->output_size() == 3; }
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {2}; }
|
||||
};
|
||||
|
||||
using non_max_suppression_node = typed_program_node<non_max_suppression>;
|
||||
|
@ -19,6 +19,7 @@ public:
|
||||
|
||||
program_node& input() const { return get_dependency(0); }
|
||||
program_node& scale() const { return get_dependency(1); }
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
using normalize_node = typed_program_node<normalize>;
|
||||
|
@ -83,6 +83,22 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
bool is_shape_infer_dep(void) const {
|
||||
if (!myprog.get_config().get_property(ov::intel_gpu::allow_new_shape_infer))
|
||||
return false;
|
||||
for (auto u : users) {
|
||||
for (auto dep_idx : u->get_shape_infer_dependencies()) {
|
||||
if (u->get_dependencies().size() <= dep_idx) {
|
||||
continue;
|
||||
}
|
||||
if (u->get_dependency(dep_idx).get_unique_id() == unique_id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::map<size_t, memory::ptr> get_const_memory_deps() const;
|
||||
|
||||
virtual std::unique_ptr<kernel_impl_params> get_kernel_impl_params() const {
|
||||
|
@ -9,7 +9,17 @@
|
||||
#include <string>
|
||||
|
||||
namespace cldnn {
|
||||
template <>
|
||||
struct typed_program_node<reduce> : public typed_program_node_base<reduce> {
|
||||
using parent = typed_program_node_base<reduce>;
|
||||
typed_program_node(const std::shared_ptr<reduce> prim, program& prog) : parent(prim, prog) {}
|
||||
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input(size_t index = 0) const { return get_dependency(index); }
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
using reduce_node = typed_program_node<reduce>;
|
||||
|
||||
template <>
|
||||
|
@ -18,6 +18,7 @@ public:
|
||||
|
||||
program_node& input(size_t idx = 0) const { return get_dependency(idx); }
|
||||
size_t inputs_count() const { return get_dependencies().size(); }
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
using select_node = typed_program_node<select>;
|
||||
|
@ -1684,15 +1684,9 @@ format layout_optimizer::get_preferred_format(program_node& node) {
|
||||
node.set_preferred_input_fmt(0, get_preferred_format(node.get_dependency(0)));
|
||||
|
||||
// shape_infer_dep should be plain format because the memory is being read by ngraph shape infer as is
|
||||
for (auto u : node.get_users()) {
|
||||
for (auto dep_idx : u->get_shape_infer_dependencies()) {
|
||||
if (u->get_dependencies().size() <= dep_idx)
|
||||
continue;
|
||||
if (u->get_dependency(dep_idx).get_unique_id() == node.get_unique_id()) {
|
||||
expected = format::get_default_format(output_layout.get_rank(), false, false);
|
||||
return expected;
|
||||
}
|
||||
}
|
||||
if (node.is_shape_infer_dep()) {
|
||||
expected = format::get_default_format(output_layout.get_rank(), false, false);
|
||||
return expected;
|
||||
}
|
||||
}
|
||||
if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
|
||||
|
@ -776,6 +776,7 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
|
||||
return pool.get_memory(static_layout, type);
|
||||
};
|
||||
|
||||
|
||||
auto layout = impl_params.get_output_layout(idx);
|
||||
OPENVINO_ASSERT(layout.is_static() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout");
|
||||
auto device_mem_acc = [&](size_t a, const cldnn::layout& l) {
|
||||
@ -804,8 +805,11 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
|
||||
auto use_lockable_memory = is_output_buffer(_node) || is_cpu || is_any_user_cpu(_node.get_users()) ||
|
||||
!_engine.supports_allocation(allocation_type::usm_device);
|
||||
const auto& lockable_mem_type = _engine.get_lockable_preferred_memory_allocation_type(layout.format.is_image_2d());
|
||||
|
||||
// If this node is to be used as shape infer, it needs to copy data to be used by shape infer.
|
||||
auto alloc_type = use_lockable_memory ? lockable_mem_type
|
||||
: usm_device_allocatable ? allocation_type::usm_device : lockable_mem_type;
|
||||
: !usm_device_allocatable ? lockable_mem_type :
|
||||
!_node.is_shape_infer_dep() ? allocation_type::usm_device : lockable_mem_type;
|
||||
|
||||
if ((is_internal && (_node.can_be_optimized() || _node.is_type<generic_layer>())) || (memory_reuse_by_user == false)) {
|
||||
GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;
|
||||
|
@ -738,13 +738,15 @@ void program::transfer_memory_to_device() {
|
||||
return;
|
||||
|
||||
for (auto& node : processing_order) {
|
||||
if (node->is_shape_infer_dep()) {
|
||||
continue;
|
||||
}
|
||||
if (node->is_type<data>() && !node->need_lockable_memory()) {
|
||||
auto& data_node = node->as<data>();
|
||||
auto data_node_layout = data_node.get_output_layout();
|
||||
auto& mem = data_node.get_attached_memory();
|
||||
auto mem_layout = mem.get_layout();
|
||||
auto alloc_type = mem.get_allocation_type();
|
||||
|
||||
if (!mem_layout.compatible(data_node_layout)) {
|
||||
std::string err_str("Node and memory layouts are incompatible, error occurred for " + node->id() + " node");
|
||||
throw std::invalid_argument(err_str);
|
||||
|
Loading…
Reference in New Issue
Block a user