[GPU] Fix setting impl type / format issue on dGPU (#18345)

* [GPU] Add roi_align get_shape_infer_dependencies (#18345)

* [GPU] Fix concat cpu impl for buffer fusing case

* [GPU] Add roi_align shape_infer unit tests

* [GPU] Fix windows build issue

* [GPU] add unit test
This commit is contained in:
Paul Youngsoo Ahn
2023-07-07 18:04:14 +09:00
committed by GitHub
parent 2b795afc09
commit da84027b72
4 changed files with 133 additions and 2 deletions

View File

@@ -65,8 +65,11 @@ struct concatenation_impl : public typed_primitive_impl<concatenation> {
"[GPU] Couldn't create concat operation: unsupported mixed inputs/output data types");
std::vector<memory::ptr> input_mem_ptrs;
for (size_t i = 0; i < instance.dependencies().size(); i++)
input_mem_ptrs.push_back(instance.dep_memory_ptr(i));
for (size_t i = 0; i < instance.dependencies().size(); i++) {
auto& dep = instance.dependencies().at(i);
if (dep.first->get_output_layout().count() > 0)
input_mem_ptrs.push_back(instance.dep_memory_ptr(i));
}
auto output_mem_ptr = instance.output_memory_ptr();

View File

@@ -8,6 +8,17 @@
namespace cldnn {
template <>
struct typed_program_node<roi_align> : public typed_program_node_base<roi_align> {
using parent = typed_program_node_base<roi_align>;
typed_program_node(const std::shared_ptr<roi_align> prim, program& prog) : parent(prim, prog) {}
public:
using parent::parent;
program_node& input() const { return get_dependency(0); }
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
};
using roi_align_node = typed_program_node<roi_align>;
template <>

View File

@@ -223,3 +223,50 @@ TEST(mark_shape_of_subgraphs, simple_chain_w_inserted_reorder) {
ASSERT_TRUE(check_subgraph(prog->get_node("shape_of"), prog->get_node("eltwise")));
}
TEST(mark_shape_of_subgraphs, concat_with_empty_tensor_inputs) {
auto& engine = get_test_engine();
auto input_layout_dynamic = layout{ov::PartialShape{ov::Dimension::dynamic(), 4}, data_types::f32, format::bfyx};
auto input_layout_empty = layout{ov::PartialShape{}, data_types::f32, format::bfyx};
auto data_0 = engine.allocate_memory({ ov::PartialShape{1}, data_types::i64, format::bfyx });
set_values(data_0, {0});
topology topology;
topology.add(input_layout("input", input_layout_dynamic));
topology.add(input_layout("input_empty", input_layout_empty));
topology.add(data("data_0", data_0));
topology.add(shape_of("shape_of_01", input_info("input"), data_types::i64));
topology.add(gather("gather01", input_info("shape_of_01"), input_info("data_0"), 0, {1}));
topology.add(shape_of("shape_of_02", input_info("input_empty"), data_types::i64));
topology.add(shape_of("shape_of_03", input_info("input_empty"), data_types::i64));
topology.add(concatenation("concat", {input_info("gather01"), input_info("shape_of_02"), input_info("shape_of_03")}, 0));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
auto prog = network.get_program();
ASSERT_NE(prog, nullptr);
ASSERT_TRUE(check_subgraph(prog->get_node("shape_of_01"), prog->get_node("concat"), {{"concat", 3}}));
ASSERT_TRUE(check_subgraph(prog->get_node("shape_of_02"), prog->get_node("concat"), {{"concat", 3}}));
ASSERT_TRUE(check_subgraph(prog->get_node("shape_of_03"), prog->get_node("concat"), {{"concat", 3}}));
auto input_mem = engine.allocate_memory({ov::PartialShape{5, 4}, data_types::f32, format::bfyx});
set_values(input_mem, {10.f});
network.set_input_data("input", input_mem);
auto input_empty_mem = engine.allocate_memory(input_layout_empty);
network.set_input_data("input_empty", input_empty_mem);
auto outputs = network.execute();
auto output_prim = outputs.begin()->second.get_memory();
cldnn::mem_lock<int64_t> output_ptr (output_prim, get_test_stream());
ASSERT_EQ(1, output_prim->get_layout().count());
for (size_t i = 0; i < output_prim->get_layout().count(); ++i) {
ASSERT_EQ(5, output_ptr[i]);
}
}

View File

@@ -0,0 +1,70 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/bucketize.hpp>
#include <intel_gpu/primitives/data.hpp>
#include "roi_align_inst.h"
#include "program_wrapper.h"
#include <cmath>
#include <algorithm>
using namespace cldnn;
using namespace ::tests;
namespace shape_infer_tests {
struct roi_align_test_params {
layout input_layout;
int num_roi;
int pooled_h;
int pooled_w;
layout expected_layout;
};
class roi_align_test : public testing::TestWithParam<roi_align_test_params> { };
TEST_P(roi_align_test, shape_infer) {
auto p = GetParam();
auto& engine = get_test_engine();
auto input0_layout_prim = std::make_shared<input_layout>("input0", p.input_layout);
auto input1_layout_prim = std::make_shared<input_layout>("input1", layout{ov::PartialShape{p.num_roi, 4}, p.input_layout.data_type, p.input_layout.format});
auto input2_layout_prim = std::make_shared<input_layout>("input2", layout{ov::PartialShape{p.num_roi}, p.input_layout.data_type, p.input_layout.format});
auto inputs = std::vector<input_info>{ input_info("input0"), input_info("input1"), input_info("input2")};
auto roi_align_prim = std::make_shared<roi_align>("output", inputs,
p.pooled_h, p.pooled_w, 2, 1.0f,
roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn);
cldnn::program prog(engine);
auto& input0_layout_node = prog.get_or_create(input0_layout_prim);
auto& input1_layout_node = prog.get_or_create(input1_layout_prim);
auto& input2_layout_node = prog.get_or_create(input2_layout_prim);
auto& roi_align_node = prog.get_or_create(roi_align_prim);
program_wrapper::add_connection(prog, input0_layout_node, roi_align_node);
program_wrapper::add_connection(prog, input1_layout_node, roi_align_node);
program_wrapper::add_connection(prog, input2_layout_node, roi_align_node);
auto res = roi_align_inst::calc_output_layouts<ov::PartialShape>(roi_align_node, *roi_align_node.get_kernel_impl_params());
ASSERT_EQ(res.size(), 1);
ASSERT_EQ(res[0], p.expected_layout);
}
INSTANTIATE_TEST_SUITE_P(smoke, roi_align_test,
testing::ValuesIn(std::vector<roi_align_test_params>{
{ layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx}, -1, 3, 3, layout{ov::PartialShape{-1, 2, 3, 3}, data_types::f16, format::bfyx}},
{ layout{ov::PartialShape{1, 2, 3, 4}, data_types::f32, format::bfyx}, 10, 2, 2, layout{ov::PartialShape{10, 2, 2, 2}, data_types::f32, format::bfyx}},
{ layout{ov::PartialShape::dynamic(4), data_types::f16, format::bfyx}, 10, 7, 7, layout{ov::PartialShape{10, -1, 7, 7}, data_types::f16, format::bfyx}},
{ layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}, -1, 2, 2, layout{ov::PartialShape{-1, -1, 2, 2}, data_types::f32, format::bfyx}}
}));
} // shape_infer_tests