[GPU] Fix input feature map indexing with pad and batch indices for ROIAlign (#19511)
* [GPU] Fix input feature map indexing with pad and batch indices for ROIAlign * Fix failed TCs for ov_gpu_func_tests Signed-off-by: Andrew Park <andrew.park@intel.com> * Fix to do batch interpretation for inconsistency between ROIALign input and const 1D tensor Signed-off-by: Andrew Park <andrew.park@intel.com> --------- Signed-off-by: Andrew Park <andrew.park@intel.com>
This commit is contained in:
committed by
GitHub
parent
441adcc122
commit
92c6316e8e
@@ -9,11 +9,11 @@
|
||||
#define POOLED_WIDTH OUTPUT_SIZE_X
|
||||
#define POOLED_HEIGHT OUTPUT_SIZE_Y
|
||||
|
||||
KERNEL(roi_align_ref)
|
||||
(const __global INPUT0_TYPE* src_data,
|
||||
__global OUTPUT_TYPE* dst_data,
|
||||
const __global INPUT1_TYPE* src_rois,
|
||||
const __global INPUT2_TYPE* src_batches) {
|
||||
KERNEL(roi_align_ref)(const __global INPUT0_TYPE* src_data,
|
||||
__global OUTPUT_TYPE* dst_data,
|
||||
const __global INPUT1_TYPE* src_rois,
|
||||
const __global INPUT2_TYPE* src_batches)
|
||||
{
|
||||
const size_t i = get_global_id(0);
|
||||
|
||||
const uint x = i % POOLED_WIDTH;
|
||||
@@ -23,6 +23,9 @@ KERNEL(roi_align_ref)
|
||||
|
||||
const __global INPUT1_TYPE* roi_ptr = &src_rois[INPUT1_GET_INDEX(r, 0, 0, 0)];
|
||||
|
||||
// Get the batch index of feature map
|
||||
const uint b = (uint)src_batches[INPUT2_GET_INDEX(r, 0, 0, 0)];
|
||||
|
||||
// Get ROI`s corners
|
||||
const INPUT1_TYPE x1 =
|
||||
(roi_ptr[0] + (INPUT1_TYPE)OFFSET_SRC) * (INPUT1_TYPE)SPATIAL_SCALE + (INPUT1_TYPE)OFFSET_DST;
|
||||
@@ -46,8 +49,6 @@ KERNEL(roi_align_ref)
|
||||
const INPUT1_TYPE sample_distance_x = bin_width / (INPUT1_TYPE)sampling_ratio_x;
|
||||
const INPUT1_TYPE sample_distance_y = bin_height / (INPUT1_TYPE)sampling_ratio_y;
|
||||
|
||||
const __global INPUT0_TYPE* data = src_data + INPUT0_GET_INDEX(r, c, 0, 0);
|
||||
|
||||
OUTPUT_TYPE pooled_value = 0;
|
||||
for (unsigned int y_sample_ind = 0; y_sample_ind < sampling_ratio_y; y_sample_ind++) {
|
||||
INPUT1_TYPE sample_y =
|
||||
@@ -91,10 +92,10 @@ KERNEL(roi_align_ref)
|
||||
weight_right = INPUT1_VAL_ONE - weight_left;
|
||||
}
|
||||
|
||||
const INPUT0_TYPE top_left = data[INPUT0_GET_INDEX(0, 0, sample_y_low, sample_x_low)];
|
||||
const INPUT0_TYPE top_right = data[INPUT0_GET_INDEX(0, 0, sample_y_low, sample_x_high)];
|
||||
const INPUT0_TYPE bottom_left = data[INPUT0_GET_INDEX(0, 0, sample_y_high, sample_x_low)];
|
||||
const INPUT0_TYPE bottom_right = data[INPUT0_GET_INDEX(0, 0, sample_y_high, sample_x_high)];
|
||||
const INPUT0_TYPE top_left = src_data[INPUT0_GET_INDEX(b, c, sample_y_low, sample_x_low)];
|
||||
const INPUT0_TYPE top_right = src_data[INPUT0_GET_INDEX(b, c, sample_y_low, sample_x_high)];
|
||||
const INPUT0_TYPE bottom_left = src_data[INPUT0_GET_INDEX(b, c, sample_y_high, sample_x_low)];
|
||||
const INPUT0_TYPE bottom_right = src_data[INPUT0_GET_INDEX(b, c, sample_y_high, sample_x_high)];
|
||||
|
||||
const INPUT0_TYPE interpolated_value =
|
||||
weight_bottom * weight_right * top_left + weight_bottom * weight_left * top_right +
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "openvino/op/gather.hpp"
|
||||
#include "openvino/op/split.hpp"
|
||||
#include "openvino/op/prelu.hpp"
|
||||
#include "openvino/op/roi_align.hpp"
|
||||
#include "openvino/op/variadic_split.hpp"
|
||||
#include "openvino/op/util/op_types.hpp"
|
||||
|
||||
@@ -180,6 +181,8 @@ static void CreateConstantOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v0
|
||||
if (constDims.size() == 4 && input_shape.size() == 3) { // In case of weight dim 4 and input dim 3,
|
||||
constDims.push_back(1); // The weight cldnn tensor adds 1d to the end as the input cldnn tensor does
|
||||
}
|
||||
} else if (ov::is_type<ov::op::v3::ROIAlign>(outOp) || ov::is_type<ov::op::v9::ROIAlign>(outOp)) {
|
||||
consts[op].needsBatchInterpretation = constDims.size() == 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -199,3 +199,71 @@ TYPED_TEST(roi_align_test, max_half_pixel_cached) {
|
||||
{TD(4.375f), TD(4.9375f), TD(5.6875f), TD(5.625f), TD(4.625f), TD(7.125f), TD(3.3125f), TD(4.3125f)};
|
||||
this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel, true);
|
||||
}
|
||||
|
||||
TEST(roi_align_gpu_fp32, bfyx_inpad_1x1) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const int pooled_h{2};
|
||||
const int pooled_w{2};
|
||||
const int sampling_ratio{2};
|
||||
const float spatial_scale{1};
|
||||
|
||||
const std::vector<float> input_data = {
|
||||
0.f, 1.f, 8.f, 5.f, 5.f, 2.f, 0.f, 7.f, 7.f, 10.f, 4.f,
|
||||
5.f, 9.f, 0.f, 0.f, 5.f, 7.f, 0.f, 4.f, 0.f, 4.f, 7.f,
|
||||
6.f, 10.f, 9.f, 5.f, 1.f, 7.f, 4.f, 7.f, 10.f, 8.f, 2.f,
|
||||
0.f, 8.f, 3.f, 6.f, 8.f, 10.f, 4.f, 2.f, 10.f, 7.f, 8.f,
|
||||
7.f, 0.f, 6.f, 9.f, 2.f, 4.f, 8.f, 5.f, 2.f, 3.f, 3.f,
|
||||
1.f, 5.f, 9.f, 10.f, 0.f, 9.f, 5.f, 5.f, 3.f, 10.f, 5.f,
|
||||
2.f, 0.f, 10.f, 0.f, 5.f, 4.f, 3.f, 10.f, 5.f, 5.f, 10.f,
|
||||
0.f, 8.f, 8.f, 9.f, 1.f, 0.f, 7.f, 9.f, 6.f, 8.f, 7.f,
|
||||
10.f, 9.f, 2.f, 3.f, 3.f, 5.f, 6.f, 9.f, 4.f, 9.f, 2.f,
|
||||
4.f, 5.f, 5.f, 3.f, 1.f, 1.f, 6.f, 8.f, 0.f, 5.f, 5.f,
|
||||
10.f, 8.f, 6.f, 9.f, 6.f, 9.f, 1.f, 2.f, 7.f, 1.f, 1.f,
|
||||
3.f, 0.f, 4.f, 0.f, 7.f, 10.f, 2.f
|
||||
};
|
||||
const std::vector<float> coords_data = {2.f, 2.f, 4.f, 4.f, 2.f, 2.f, 4.f, 4.f};
|
||||
const std::vector<int32_t> roi_data = {0, 1};
|
||||
|
||||
auto input = engine.allocate_memory({ov::PartialShape{2, 1, 8, 8}, data_types::f32, format::bfyx});
|
||||
auto coords = engine.allocate_memory({ov::PartialShape{2, 4, 1, 1}, data_types::f32, format::bfyx});
|
||||
auto roi_ind = engine.allocate_memory({ov::PartialShape{2, 1, 1, 1}, data_types::i32, format::bfyx});
|
||||
set_values(input, input_data);
|
||||
set_values(coords, coords_data);
|
||||
set_values(roi_ind, roi_data);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(input_layout("coords", coords->get_layout()));
|
||||
topology.add(input_layout("roi_ind", roi_ind->get_layout()));
|
||||
topology.add(reorder("reorder_input", input_info("input"), input->get_layout().with_padding(padding{ {0,0,1,1},0 })));
|
||||
topology.add(roi_align("roi_align",
|
||||
{ input_info("reorder_input"), input_info("coords"), input_info("roi_ind") },
|
||||
pooled_h,
|
||||
pooled_w,
|
||||
sampling_ratio,
|
||||
spatial_scale,
|
||||
roi_align::PoolingMode::avg,
|
||||
roi_align::AlignedMode::asymmetric));
|
||||
topology.add(reorder("out", input_info("roi_align"), format::bfyx, data_types::f32));
|
||||
|
||||
network network(engine, topology, get_test_default_config(engine));
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network.set_input_data("coords", coords);
|
||||
network.set_input_data("roi_ind", roi_ind);
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("out").get_memory();
|
||||
|
||||
std::vector<float> expected_output = {
|
||||
3.f, 3.75f, 4.75f, 5.f, 3.f, 5.5f, 2.75f, 3.75f
|
||||
};
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
ASSERT_EQ(output_ptr.size(), expected_output.size());
|
||||
for (uint32_t i = 0; i < expected_output.size(); ++i) {
|
||||
ASSERT_EQ(output_ptr[i], expected_output[i]);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user