[GPU] Need to exclude fused mem_dep from shape_infer_dep (#16300)
This commit is contained in:
parent
951c5fdae9
commit
6bf2fe11ae
@ -91,6 +91,9 @@ public:
|
|||||||
if (u->get_dependencies().size() <= dep_idx) {
|
if (u->get_dependencies().size() <= dep_idx) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (u->is_fused_dep(dep_idx)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (u->get_dependency(dep_idx).get_unique_id() == unique_id) {
|
if (u->get_dependency(dep_idx).get_unique_id() == unique_id) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -99,6 +102,8 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_fused_dep(size_t dep_idx) const;
|
||||||
|
|
||||||
std::map<size_t, memory::ptr> get_const_memory_deps() const;
|
std::map<size_t, memory::ptr> get_const_memory_deps() const;
|
||||||
|
|
||||||
virtual std::unique_ptr<kernel_impl_params> get_kernel_impl_params() const {
|
virtual std::unique_ptr<kernel_impl_params> get_kernel_impl_params() const {
|
||||||
|
@ -190,6 +190,10 @@ void primitive_inst::update_shape() {
|
|||||||
}
|
}
|
||||||
auto& dep = _node->get_dependency(i);
|
auto& dep = _node->get_dependency(i);
|
||||||
auto dep_id = dep.id();
|
auto dep_id = dep.id();
|
||||||
|
// exclude fused node from memory_deps
|
||||||
|
if (_node->is_fused_dep(i)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
// Events may be not created for in-order queue, so take them for OOO queue only
|
// Events may be not created for in-order queue, so take them for OOO queue only
|
||||||
if (_network.has_event(dep.id()) && queue_type == QueueTypes::out_of_order) {
|
if (_network.has_event(dep.id()) && queue_type == QueueTypes::out_of_order) {
|
||||||
dependencies_events.push_back(_network.get_primitive_event(dep_id));
|
dependencies_events.push_back(_network.get_primitive_event(dep_id));
|
||||||
|
@ -378,6 +378,16 @@ bool program_node::has_padded_dependency() const {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool program_node::is_fused_dep(size_t dep_idx) const {
|
||||||
|
for (auto fused : get_fused_primitives()) {
|
||||||
|
if (dep_idx >= fused.dep_start_idx) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
std::map<size_t, memory::ptr> program_node::get_const_memory_deps() const {
|
std::map<size_t, memory::ptr> program_node::get_const_memory_deps() const {
|
||||||
std::map<size_t, memory::ptr> mem_deps;
|
std::map<size_t, memory::ptr> mem_deps;
|
||||||
for (auto& i : get_shape_infer_dependencies()) {
|
for (auto& i : get_shape_infer_dependencies()) {
|
||||||
@ -385,6 +395,12 @@ std::map<size_t, memory::ptr> program_node::get_const_memory_deps() const {
|
|||||||
if (i >= get_dependencies().size())
|
if (i >= get_dependencies().size())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
// exclude fused dependency
|
||||||
|
if (is_fused_dep(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// constant type only
|
||||||
auto& dep = get_dependency(i);
|
auto& dep = get_dependency(i);
|
||||||
if (dep.is_type<data>()) {
|
if (dep.is_type<data>()) {
|
||||||
mem_deps.insert({i, dep.as<data>().get_attached_memory_ptr()});
|
mem_deps.insert({i, dep.as<data>().get_attached_memory_ptr()});
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <intel_gpu/primitives/input_layout.hpp>
|
#include <intel_gpu/primitives/input_layout.hpp>
|
||||||
#include <intel_gpu/primitives/deconvolution.hpp>
|
#include <intel_gpu/primitives/deconvolution.hpp>
|
||||||
#include <intel_gpu/primitives/crop.hpp>
|
#include <intel_gpu/primitives/crop.hpp>
|
||||||
|
#include <intel_gpu/primitives/eltwise.hpp>
|
||||||
#include <intel_gpu/primitives/reorder.hpp>
|
#include <intel_gpu/primitives/reorder.hpp>
|
||||||
#include <intel_gpu/primitives/data.hpp>
|
#include <intel_gpu/primitives/data.hpp>
|
||||||
|
|
||||||
@ -258,6 +259,78 @@ TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad_exclude_fused_mem_dep) {
|
||||||
|
// Filter : 2x2
|
||||||
|
// Input : 2x2
|
||||||
|
// Output : 3x3
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// 8 0.5
|
||||||
|
// 6 9
|
||||||
|
//
|
||||||
|
// Filter
|
||||||
|
// -2 0.5
|
||||||
|
// 3.5 1.5
|
||||||
|
//
|
||||||
|
// no bias
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// -16.f, 3.f, 0.25f,
|
||||||
|
// 16.f, -1.25f, 5.25f,
|
||||||
|
// 21.f, 40.5f, 13.5f
|
||||||
|
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
|
||||||
|
auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } });
|
||||||
|
auto elt_input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 9, 1, 1, 1 } });
|
||||||
|
auto in_layout = layout(ov::PartialShape::dynamic(4), data_types::f32, format::yxfb);
|
||||||
|
|
||||||
|
set_values(input, { 8.f, 0.5f, 6.f, 9.f });
|
||||||
|
set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
|
||||||
|
set_values(elt_input, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f });
|
||||||
|
|
||||||
|
topology topology(
|
||||||
|
input_layout("input", in_layout),
|
||||||
|
input_layout("elt_input", elt_input->get_layout()),
|
||||||
|
reorder("reordered_input", input_info("input"), this->input_layout_format, data_types::f32),
|
||||||
|
reorder("reordered_elt_input", input_info("elt_input"), format::bfyx, data_types::f32),
|
||||||
|
data("weights", weights),
|
||||||
|
deconvolution("deconv", input_info("reordered_input"), { "weights" }),
|
||||||
|
eltwise("elt_scale", { input_info("deconv"), input_info("reordered_elt_input") }, eltwise_mode::prod),
|
||||||
|
reorder("plane_output", input_info("elt_scale"), format::bfyx, data_types::f32)
|
||||||
|
);
|
||||||
|
|
||||||
|
ExecutionConfig config;
|
||||||
|
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||||
|
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||||
|
|
||||||
|
network network(engine, topology, config);
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
network.set_input_data("elt_input", elt_input);
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
ASSERT_EQ(outputs.size(), size_t(1));
|
||||||
|
ASSERT_EQ(outputs.begin()->first, "plane_output");
|
||||||
|
|
||||||
|
auto output_prim = outputs.begin()->second.get_memory();
|
||||||
|
|
||||||
|
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
|
||||||
|
|
||||||
|
std::vector<float> expected_output_vec = {
|
||||||
|
-16.f, 3.f, 0.25f,
|
||||||
|
16.f, -1.25f, 5.25f,
|
||||||
|
21.f, 40.5f, 13.5f
|
||||||
|
};
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < expected_output_vec.size(); i++)
|
||||||
|
{
|
||||||
|
ASSERT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) { // Filter : 2x2
|
TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) { // Filter : 2x2
|
||||||
// Input : 2x2
|
// Input : 2x2
|
||||||
// Output : 3x3
|
// Output : 3x3
|
||||||
|
Loading…
Reference in New Issue
Block a user