[GPU] Fix nonzero issue in constant propagate (#16933)
* Fix gather_nonzero not to be marked as constant. Even though count nonzero is to be turned into a constant, gather nonzero still cannot infer shape at the moment of propagate constant. * Apply the fix only for gather_non_zero
This commit is contained in:
parent
9f3bc22e7a
commit
824a5aa7fb
@ -23,6 +23,7 @@
|
||||
#include "roi_pooling_inst.h"
|
||||
#include "reorg_yolo_inst.h"
|
||||
#include "eltwise_inst.h"
|
||||
#include "non_zero_inst.h"
|
||||
#include "softmax_inst.h"
|
||||
#include "permute_inst.h"
|
||||
#include "custom_gpu_primitive_inst.h"
|
||||
@ -631,7 +632,7 @@ void program::post_optimize_graph(bool is_internal) {
|
||||
// mark if the node is constant assuming that all dependencies are marked properly
|
||||
void program::mark_if_constant(program_node& node) {
|
||||
if (node.get_dependencies().empty() || node.is_type<prior_box>() ||
|
||||
node.is_type<assign>() || node.is_type<read_value>()) {
|
||||
node.is_type<assign>() || node.is_type<read_value>() || node.is_type<gather_nonzero>()) {
|
||||
return;
|
||||
}
|
||||
node.constant = true;
|
||||
|
@ -482,3 +482,51 @@ TEST(test_gather_non_zero, not_use_local_mem) {
|
||||
ASSERT_EQ(expected_results[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(non_zero_gpu, const_input) {
|
||||
auto& engine = get_test_engine();
|
||||
ov::Shape in_shape = { 3, 3 };
|
||||
auto input_data_mem = engine.allocate_memory(layout{ov::PartialShape(in_shape), data_types::f32, format::bfyx});
|
||||
|
||||
std::vector<float> input_data = {
|
||||
3.f, 0.f, 0.f,
|
||||
0.f, 4.f, 0.f,
|
||||
5.f, 6.f, 0.f,
|
||||
};
|
||||
set_values(input_data_mem, input_data);
|
||||
|
||||
std::vector<int32_t> out_data = {
|
||||
0, 1, 2, 2, 0, 1, 0, 1
|
||||
};
|
||||
|
||||
topology topology;
|
||||
topology.add(data("InputData", input_data_mem));
|
||||
topology.add(count_nonzero("count_nonzero", input_info("InputData")));
|
||||
topology.add(gather_nonzero("gather_nonzero", input_info("InputData"), input_info("count_nonzero")));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
network network(engine, topology, config);
|
||||
|
||||
auto count_nonzero_inst = network.get_primitive("count_nonzero");
|
||||
auto count_nonzero_impl = count_nonzero_inst->get_impl();
|
||||
ASSERT_TRUE(count_nonzero_impl == nullptr);
|
||||
// count nonzero should be optimized out from propagate_constant pass
|
||||
ASSERT_TRUE(count_nonzero_inst->get_node().is_type<data>());
|
||||
|
||||
auto gather_nonzero_inst = network.get_primitive("gather_nonzero");
|
||||
auto gather_nonzero_impl = gather_nonzero_inst->get_impl();
|
||||
ASSERT_TRUE(gather_nonzero_impl != nullptr);
|
||||
ASSERT_TRUE(gather_nonzero_impl->is_dynamic());
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("gather_nonzero").get_memory();
|
||||
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
|
||||
|
||||
ASSERT_EQ(output_ptr.size(), (uint32_t)8);
|
||||
for (uint32_t i = 0; i < out_data.size(); ++i) {
|
||||
ASSERT_FLOAT_EQ(output_ptr[i], out_data[i]);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user