[GPU] Fix redefinition variables issue in ExperimentalDetectronROIFeatureExtractor kernel (#9960)
This commit is contained in:
parent
a7910b8869
commit
d5b74b0c6b
@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// Copyright (C) 2021-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@ -37,10 +37,8 @@ namespace {
|
||||
return result;
|
||||
}
|
||||
|
||||
const std::string level_ptrs = "level_ptrs";
|
||||
|
||||
std::string GetDefinedLevelPtrs(size_t levels_num) {
|
||||
std::string result = "const __global INPUT1_TYPE* " + level_ptrs + "[" + std::to_string(levels_num) + "] = {" + common_level_name + "1";
|
||||
std::string result = "(const __global INPUT1_TYPE*[]){" + common_level_name + "1";
|
||||
for (size_t i = 1; i < levels_num; i++) {
|
||||
result += ", " + common_level_name + std::to_string(i + 1);
|
||||
}
|
||||
@ -48,10 +46,8 @@ namespace {
|
||||
return result;
|
||||
}
|
||||
|
||||
const std::string spatial_scales = "spatial_scales";
|
||||
|
||||
std::string GetDefinedSpatialScales(const std::vector<int64_t>& scales, size_t levels_num) {
|
||||
std::string result = "__constant float " + spatial_scales + "[" + std::to_string(levels_num) + "] = {" + std::to_string(1.0f / scales[0]);
|
||||
std::string result = "(float[]){" + std::to_string(1.0f / scales[0]);
|
||||
for (size_t i = 1; i < levels_num; i++) {
|
||||
result += ", " + std::to_string(1.0f / scales[i]);
|
||||
}
|
||||
@ -59,10 +55,8 @@ namespace {
|
||||
return result;
|
||||
}
|
||||
|
||||
const std::string level_sizes = "level_sizes";
|
||||
|
||||
std::string GetDefinedLevelSizes(size_t levels_num) {
|
||||
std::string result = "__constant int " + level_sizes + "[" + std::to_string(3 * levels_num) +"] = {INPUT1_SIZE_Y, INPUT1_SIZE_X, INPUT1_OFFSET";
|
||||
std::string result = "(size_t[]){INPUT1_SIZE_Y, INPUT1_SIZE_X, INPUT1_OFFSET";
|
||||
std::string idx = "";
|
||||
for (size_t i = 1; i < levels_num; i++) {
|
||||
idx = std::to_string(i + 1);
|
||||
@ -83,12 +77,9 @@ JitConstants ExperimentalDetectronROIFeatureExtractorRef::GetJitConstants(const
|
||||
MakeJitConstant("IS_ALIGNED", params.aligned),
|
||||
MakeJitConstant("NUM_PYRAMID_LEVELS", levels_num),
|
||||
MakeJitConstant("INPUT_LEVEL_PARAMS", GetInputLevelParams(levels_num)),
|
||||
MakeJitConstant("LEVEL_PTRS", level_ptrs),
|
||||
MakeJitConstant("DEFINE_LEVEL_PTRS", GetDefinedLevelPtrs(levels_num)),
|
||||
MakeJitConstant("SPATIAL_SCALES", spatial_scales),
|
||||
MakeJitConstant("DEFINE_SPATIAL_SCALES", GetDefinedSpatialScales(params.pyramid_scales, levels_num)),
|
||||
MakeJitConstant("LEVEL_SIZES", level_sizes),
|
||||
MakeJitConstant("DEFINE_LEVEL_SIZES", GetDefinedLevelSizes(levels_num))});
|
||||
MakeJitConstant("LEVEL_PTRS", GetDefinedLevelPtrs(levels_num)),
|
||||
MakeJitConstant("SPATIAL_SCALES", GetDefinedSpatialScales(params.pyramid_scales, levels_num)),
|
||||
MakeJitConstant("LEVEL_SIZES", GetDefinedLevelSizes(levels_num))});
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
@ -1,13 +1,10 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// Copyright (C) 2021-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
DEFINE_SPATIAL_SCALES;
|
||||
DEFINE_LEVEL_SIZES;
|
||||
|
||||
inline int FUNC(get_pyramid_level_for_roi)(const __global INPUT0_TYPE* current_roi) {
|
||||
const INPUT0_TYPE canonical_scale = 224.0;
|
||||
const int canonical_level = 2;
|
||||
@ -42,7 +39,6 @@ KERNEL(experimental_detectron_roi_feature_extractor_ref)(const __global INPUT0_T
|
||||
|
||||
const int level = FUNC_CALL(get_pyramid_level_for_roi)(current_roi_ptr);
|
||||
|
||||
DEFINE_LEVEL_PTRS;
|
||||
const __global INPUT1_TYPE* current_level_ptr = LEVEL_PTRS[level];
|
||||
|
||||
INPUT0_TYPE offset = IS_ALIGNED ? TO_INPUT0_TYPE(0.5f) : TO_INPUT0_TYPE(0.0);
|
||||
|
@ -156,3 +156,125 @@ TEST(experimental_detectron_roi_feature_extractor_gpu_fp32, two_levels) {
|
||||
EXPECT_FLOAT_EQ(expected_second_output[i], second_output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(experimental_detectron_roi_feature_extractor_gpu_fp32, multiple_feature_extractor_op_with_different_number_of_inputs) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const int rois_num = 2;
|
||||
const int rois_feature_dim = 4;
|
||||
const int output_dim = 3;
|
||||
const std::vector<int64_t> pyramid_scales_first_instance = {4};
|
||||
const std::vector<int64_t> pyramid_scales_second_instance = {4, 224};
|
||||
const int sampling_ratio = 2;
|
||||
const bool aligned = false;
|
||||
auto roi_input_first_instance = engine.allocate_memory({data_types::f32, format::bfyx, tensor(batch(rois_num), feature(rois_feature_dim))});
|
||||
auto roi_input_second_instance = engine.allocate_memory({data_types::f32, format::bfyx, tensor(batch(rois_num), feature(rois_feature_dim))});
|
||||
auto level_1 = engine.allocate_memory({data_types::f32, format::bfyx, {1, 2, 3, 2}});
|
||||
auto level_2 = engine.allocate_memory({data_types::f32, format::bfyx, {1, 2, 3, 2}});
|
||||
auto second_output_first_instance = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(batch(rois_num), feature(rois_feature_dim))});
|
||||
auto second_output_second_instance = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(batch(rois_num), feature(rois_feature_dim))});
|
||||
|
||||
std::vector<float> rois_first_instance {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
|
||||
std::vector<float> rois_second_instance {0.0f, 56.0f, 112.0f, 168.0f, 4.0f, 5.0f, 6.0f, 7.0f};
|
||||
set_values(roi_input_first_instance, rois_first_instance);
|
||||
set_values(roi_input_second_instance, rois_second_instance);
|
||||
set_values(level_1, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f});
|
||||
set_values(level_2, {6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
|
||||
|
||||
const std::string input_rois_first_instance_id = "InputRois1";
|
||||
const std::string input_level_1_first_instance_id = "InputLevel_first_instance";
|
||||
const std::string second_output_w_first_instance_id = "second_output_w_first_instance";
|
||||
const std::string second_output_r_first_instance_id = "second_output_r_first_instance";
|
||||
const std::string feature_extractor_first_instance_id = "experimental_detectron_roi_feature_extractor_1";
|
||||
const std::string activation_abs_first_instance_id = "activation_abs_first_instance";
|
||||
topology topology;
|
||||
topology.add(input_layout(input_rois_first_instance_id, roi_input_first_instance->get_layout()));
|
||||
topology.add(input_layout(input_level_1_first_instance_id, level_1->get_layout()));
|
||||
topology.add(mutable_data(second_output_w_first_instance_id, second_output_first_instance));
|
||||
topology.add(experimental_detectron_roi_feature_extractor(feature_extractor_first_instance_id,
|
||||
{input_rois_first_instance_id, input_level_1_first_instance_id, second_output_w_first_instance_id},
|
||||
output_dim,
|
||||
pyramid_scales_first_instance,
|
||||
sampling_ratio,
|
||||
aligned));
|
||||
topology.add(activation(activation_abs_first_instance_id, feature_extractor_first_instance_id, activation_func::abs));
|
||||
topology.add(mutable_data(second_output_r_first_instance_id, {feature_extractor_first_instance_id}, second_output_first_instance));
|
||||
|
||||
const std::string input_rois_second_instance_id = "InputRois2";
|
||||
const std::string input_level_1_second_instance_id = "InputLevel1_second_instance";
|
||||
const std::string input_level_2_second_instance_id = "InputLevel2_second_instance";
|
||||
const std::string second_output_w_second_instance_id = "second_output_w_second_instance";
|
||||
const std::string second_output_r_second_instance_id = "second_output_r_second_instance";
|
||||
const std::string feature_extractor_second_instance_id = "experimental_detectron_roi_feature_extractor_2";
|
||||
const std::string activation_abs_second_instance_id = "activation_abs_second_instance";
|
||||
topology.add(input_layout(input_rois_second_instance_id, roi_input_second_instance->get_layout()));
|
||||
topology.add(input_layout(input_level_1_second_instance_id, level_1->get_layout()));
|
||||
topology.add(input_layout(input_level_2_second_instance_id, level_2->get_layout()));
|
||||
topology.add(mutable_data(second_output_w_second_instance_id, second_output_second_instance));
|
||||
topology.add(experimental_detectron_roi_feature_extractor(feature_extractor_second_instance_id,
|
||||
{input_rois_second_instance_id, input_level_1_second_instance_id, input_level_2_second_instance_id, second_output_w_second_instance_id},
|
||||
output_dim,
|
||||
pyramid_scales_second_instance,
|
||||
sampling_ratio,
|
||||
aligned));
|
||||
topology.add(activation(activation_abs_second_instance_id, feature_extractor_second_instance_id, activation_func::abs));
|
||||
topology.add(mutable_data(second_output_r_second_instance_id, {feature_extractor_second_instance_id}, second_output_second_instance));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data(input_rois_first_instance_id, roi_input_first_instance);
|
||||
network.set_input_data(input_rois_second_instance_id, roi_input_second_instance);
|
||||
network.set_input_data(input_level_1_first_instance_id, level_1);
|
||||
network.set_input_data(input_level_1_second_instance_id, level_1);
|
||||
network.set_input_data(input_level_2_second_instance_id, level_2);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> expected_first_output_first_instance {1.416667f, 1.75f, 2.083333f, 2.416667f, 2.75f, 3.083333f, 3.166667f, 3.5f, 3.833333f,
|
||||
7.416667f, 7.75f, 8.083333f, 8.416667f, 8.75f, 9.083334f, 9.166666f, 9.5f, 9.833334f,
|
||||
4.166667f, 4.5f, 4.833333f, 4.166667f, 4.5f, 4.833333f, 2.083333f, 2.25f, 2.416667f,
|
||||
10.16667f, 10.5f, 10.83333f, 10.16667f, 10.5f, 10.83333f, 5.083333f, 5.25f, 5.416667f};
|
||||
|
||||
auto first_network_output_first_instance = outputs.at(activation_abs_first_instance_id).get_memory();
|
||||
cldnn::mem_lock<float> first_output_ptr_first_instance(first_network_output_first_instance, get_test_stream());
|
||||
|
||||
ASSERT_EQ(expected_first_output_first_instance.size(), first_output_ptr_first_instance.size());
|
||||
for (std::size_t i = 0; i < expected_first_output_first_instance.size(); i++) {
|
||||
EXPECT_FLOAT_EQ(expected_first_output_first_instance[i], first_output_ptr_first_instance[i]);
|
||||
}
|
||||
|
||||
std::vector<float>& expected_second_output_first_instance = rois_first_instance;
|
||||
|
||||
auto second_network_output_first_instance = outputs.at(second_output_r_first_instance_id).get_memory();
|
||||
EXPECT_TRUE(engine.is_the_same_buffer(*second_output_first_instance, *second_network_output_first_instance));
|
||||
cldnn::mem_lock<float> second_output_ptr_first_instance(second_network_output_first_instance, get_test_stream());
|
||||
|
||||
ASSERT_EQ(expected_second_output_first_instance.size(), second_output_ptr_first_instance.size());
|
||||
for (std::size_t i = 0; i < expected_second_output_first_instance.size(); i++) {
|
||||
EXPECT_FLOAT_EQ(expected_second_output_first_instance[i], second_output_ptr_first_instance[i]);
|
||||
}
|
||||
|
||||
std::vector<float> expected_first_output_second_instance {7.41662f, 7.7499523f, 8.0832853f, 8.41662f, 8.74995f, 9.0832853f, 9.16664f, 9.49998f, 9.83331f,
|
||||
1.4166187f, 1.7499521f, 2.0832853f, 2.4166186f, 2.7499518f, 3.0832853f, 3.1666427f, 3.4999762f, 3.83331f,
|
||||
4.166667f, 4.5f, 4.833333f, 4.166667f, 4.5f, 4.833333f, 2.083333f, 2.25f, 2.416667f,
|
||||
10.16667f, 10.5f, 10.83333f, 10.16667f, 10.5f, 10.83333f, 5.083333f, 5.25f, 5.416667f};
|
||||
|
||||
auto first_network_output_second_instance = outputs.at(activation_abs_second_instance_id).get_memory();
|
||||
cldnn::mem_lock<float> first_output_ptr_second_instance(first_network_output_second_instance, get_test_stream());
|
||||
|
||||
ASSERT_EQ(expected_first_output_second_instance.size(), first_output_ptr_second_instance.size());
|
||||
for (std::size_t i = 0; i < expected_first_output_second_instance.size(); i++) {
|
||||
EXPECT_FLOAT_EQ(expected_first_output_second_instance[i], first_output_ptr_second_instance[i]);
|
||||
}
|
||||
|
||||
std::vector<float>& expected_second_output_second_instance = rois_second_instance;
|
||||
|
||||
auto second_network_output_second_instance = outputs.at(second_output_r_second_instance_id).get_memory();
|
||||
EXPECT_TRUE(engine.is_the_same_buffer(*second_output_second_instance, *second_network_output_second_instance));
|
||||
cldnn::mem_lock<float> second_output_ptr_second_instance(second_network_output_second_instance, get_test_stream());
|
||||
|
||||
ASSERT_EQ(expected_second_output_second_instance.size(), second_output_ptr_second_instance.size());
|
||||
for (std::size_t i = 0; i < expected_second_output_second_instance.size(); i++) {
|
||||
EXPECT_FLOAT_EQ(expected_second_output_second_instance[i], second_output_ptr_second_instance[i]);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user