[GPU] Blocked layouts support for ExperimentalDetectronGenerateProposalsSingleImage (#12126)
* Blocked layouts support for ExperimentalDetectronGenerateProposalsSingleImage * fix RHEL 8.2 build * reduce number of tests * Added bs_fs_yx_bsv16_fsv32 format
This commit is contained in:
parent
28549a77bf
commit
71c23489c6
@ -19,7 +19,7 @@ layout experimental_detectron_generate_proposals_single_image_inst::calc_output_
|
||||
const layout data_layout = impl_param.get_input_layout();
|
||||
auto desc = impl_param.typed_desc<experimental_detectron_generate_proposals_single_image>();
|
||||
|
||||
return layout(data_layout.data_type, format::bfyx, {static_cast<int>(desc->post_nms_count), 4, 1, 1});
|
||||
return layout(data_layout.data_type, data_layout.format, {static_cast<int>(desc->post_nms_count), 4, 1, 1});
|
||||
}
|
||||
|
||||
std::string experimental_detectron_generate_proposals_single_image_inst::to_string(
|
||||
|
@ -69,11 +69,19 @@ public:
|
||||
|
||||
namespace detail {
|
||||
attach_experimental_detectron_generate_proposals_single_image_impl::attach_experimental_detectron_generate_proposals_single_image_impl() {
|
||||
auto types = {data_types::f16, data_types::f32};
|
||||
auto formats = {
|
||||
format::bfyx,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::bs_fs_yx_bsv16_fsv16,
|
||||
format::bs_fs_yx_bsv16_fsv32,
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32
|
||||
};
|
||||
|
||||
implementation_map<experimental_detectron_generate_proposals_single_image>::add(impl_types::ocl,
|
||||
experimental_detectron_generate_proposals_single_image_impl::create, {
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfyx)
|
||||
});
|
||||
experimental_detectron_generate_proposals_single_image_impl::create, types, formats);
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include "deconvolution_inst.h"
|
||||
#include "detection_output_inst.h"
|
||||
#include "generate_proposals_inst.h"
|
||||
#include "experimental_detectron_generate_proposals_single_image_inst.hpp"
|
||||
#include "input_layout_inst.h"
|
||||
#include "shuffle_channels_inst.h"
|
||||
#include "arg_max_min_inst.h"
|
||||
@ -1522,7 +1523,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
prim.type() != cldnn::gather_tree::type_id() &&
|
||||
prim.type() != cldnn::experimental_detectron_detection_output::type_id() &&
|
||||
prim.type() != cldnn::experimental_detectron_topk_rois::type_id() &&
|
||||
prim.type() != cldnn::convert_color::type_id()) {
|
||||
prim.type() != cldnn::convert_color::type_id() &&
|
||||
prim.type() != cldnn::experimental_detectron_generate_proposals_single_image::type_id()) {
|
||||
can_use_fsv16 = false;
|
||||
}
|
||||
|
||||
@ -1575,7 +1577,9 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
prim.type() != cldnn::arg_max_min::type_id() &&
|
||||
prim.type() != cldnn::experimental_detectron_topk_rois::type_id() &&
|
||||
prim.type() != cldnn::multiclass_nms::type_id() &&
|
||||
prim.type() != cldnn::normalize::type_id()) {
|
||||
prim.type() != cldnn::normalize::type_id() &&
|
||||
prim.type() != cldnn::deconvolution::type_id() &&
|
||||
prim.type() != cldnn::experimental_detectron_generate_proposals_single_image::type_id()) {
|
||||
can_use_bs_fs_yx_bsv16_fsv16 = false;
|
||||
}
|
||||
}
|
||||
|
@ -4,47 +4,43 @@
|
||||
|
||||
#if INPUT0_TYPE_SIZE == 2 //f16
|
||||
#define HALF_ONE 0.5h
|
||||
#define ZERO 0.0h
|
||||
#else
|
||||
#define HALF_ONE 0.5f
|
||||
#define ZERO 0.0f
|
||||
#endif
|
||||
|
||||
#define ZERO INPUT0_VAL_ZERO
|
||||
|
||||
#ifdef EDGPSI_STAGE_0
|
||||
|
||||
# define COORDINATES_OFFSET 1
|
||||
#define COORDINATES_OFFSET INPUT0_VAL_ONE
|
||||
|
||||
// 0. Refine anchors
|
||||
KERNEL(edgpsi_ref_stage_0)
|
||||
(const __global INPUT0_TYPE* im_info,
|
||||
const __global INPUT0_TYPE* anchors,
|
||||
const __global INPUT0_TYPE* deltas,
|
||||
const __global INPUT0_TYPE* scores,
|
||||
__global INPUT0_TYPE* proposals) {
|
||||
const INPUT0_TYPE img_H = im_info[0];
|
||||
const INPUT0_TYPE img_W = im_info[1];
|
||||
const __global INPUT1_TYPE* anchors,
|
||||
const __global INPUT2_TYPE* deltas,
|
||||
const __global INPUT3_TYPE* scores,
|
||||
__global OUTPUT_TYPE* proposals) {
|
||||
const INPUT0_TYPE img_H = im_info[INPUT0_GET_INDEX(0, 0, 0, 0)];
|
||||
const INPUT0_TYPE img_W = im_info[INPUT0_GET_INDEX(1, 0, 0, 0)];
|
||||
|
||||
const uint h = get_global_id(0);
|
||||
const uint w = get_global_id(1);
|
||||
const uint anchor = get_global_id(2);
|
||||
|
||||
const uint offset = h * BOTTOM_W + w;
|
||||
const uint anchor_idx = (offset * ANCHORS_NUM + anchor) * 4;
|
||||
const uint proposal_idx = (offset * ANCHORS_NUM + anchor) * 5;
|
||||
const uint score_idx = offset + BOTTOM_AREA * anchor;
|
||||
const uint delta_idx = offset + BOTTOM_AREA * anchor * 4;
|
||||
const uint anchor_idx = (h * BOTTOM_W + w) * ANCHORS_NUM + anchor;
|
||||
|
||||
INPUT0_TYPE x0 = anchors[anchor_idx + 0];
|
||||
INPUT0_TYPE y0 = anchors[anchor_idx + 1];
|
||||
INPUT0_TYPE x1 = anchors[anchor_idx + 2];
|
||||
INPUT0_TYPE y1 = anchors[anchor_idx + 3];
|
||||
INPUT0_TYPE x0 = anchors[INPUT1_GET_INDEX(anchor_idx, 0, 0, 0)];
|
||||
INPUT0_TYPE y0 = anchors[INPUT1_GET_INDEX(anchor_idx, 1, 0, 0)];
|
||||
INPUT0_TYPE x1 = anchors[INPUT1_GET_INDEX(anchor_idx, 2, 0, 0)];
|
||||
INPUT0_TYPE y1 = anchors[INPUT1_GET_INDEX(anchor_idx, 3, 0, 0)];
|
||||
|
||||
const INPUT0_TYPE dx = deltas[delta_idx + 0 * BOTTOM_AREA];
|
||||
const INPUT0_TYPE dy = deltas[delta_idx + 1 * BOTTOM_AREA];
|
||||
const INPUT0_TYPE d_log_w = deltas[delta_idx + 2 * BOTTOM_AREA];
|
||||
const INPUT0_TYPE d_log_h = deltas[delta_idx + 3 * BOTTOM_AREA];
|
||||
const INPUT0_TYPE dx = deltas[INPUT2_GET_INDEX(anchor * 4 + 0 , h, w, 0)];
|
||||
const INPUT0_TYPE dy = deltas[INPUT2_GET_INDEX(anchor * 4 + 1 , h , w, 0)];
|
||||
const INPUT0_TYPE d_log_w = deltas[INPUT2_GET_INDEX(anchor * 4 + 2 , h, w, 0)];
|
||||
const INPUT0_TYPE d_log_h = deltas[INPUT2_GET_INDEX(anchor * 4 + 3 , h, w, 0)];
|
||||
|
||||
const INPUT0_TYPE score = scores[score_idx];
|
||||
const INPUT0_TYPE score = scores[INPUT3_GET_INDEX(anchor, h, w, 0)];
|
||||
|
||||
// width & height of box
|
||||
const INPUT0_TYPE ww = x1 - x0 + COORDINATES_OFFSET;
|
||||
@ -77,6 +73,7 @@ KERNEL(edgpsi_ref_stage_0)
|
||||
const INPUT0_TYPE box_w = x1 - x0 + COORDINATES_OFFSET;
|
||||
const INPUT0_TYPE box_h = y1 - y0 + COORDINATES_OFFSET;
|
||||
|
||||
const uint proposal_idx = anchor_idx * 5;
|
||||
proposals[proposal_idx + 0] = x0;
|
||||
proposals[proposal_idx + 1] = y0;
|
||||
proposals[proposal_idx + 2] = x1;
|
||||
@ -179,7 +176,7 @@ inline void FUNC(quickSortIterative)(__global Box* arr, int l, int h) {
|
||||
}
|
||||
|
||||
// 1. Sort boxes by scores
|
||||
KERNEL(edgpsi_ref_stage_1)(__global INPUT0_TYPE* proposals) {
|
||||
KERNEL(edgpsi_ref_stage_1)(__global OUTPUT_TYPE* proposals) {
|
||||
__global Box* boxes = (__global Box*)proposals;
|
||||
|
||||
FUNC_CALL(quickSortIterative)(boxes, 0, NUM_PROPOSALS-1);
|
||||
@ -262,17 +259,17 @@ KERNEL(edgpsi_ref_stage_3)
|
||||
const uint rois_offset = i * 4;
|
||||
|
||||
if (i < *num_outputs) {
|
||||
rois[rois_offset + 0] = boxes[box_offset + 0];
|
||||
rois[rois_offset + 1] = boxes[box_offset + 1];
|
||||
rois[rois_offset + 2] = boxes[box_offset + 2];
|
||||
rois[rois_offset + 3] = boxes[box_offset + 3];
|
||||
roi_scores[i] = boxes[box_offset + 4];
|
||||
rois[OUTPUT_GET_INDEX(i, 0, 0, 0)] = boxes[box_offset + 0];
|
||||
rois[OUTPUT_GET_INDEX(i, 1, 0, 0)] = boxes[box_offset + 1];
|
||||
rois[OUTPUT_GET_INDEX(i, 2, 0, 0)] = boxes[box_offset + 2];
|
||||
rois[OUTPUT_GET_INDEX(i, 3, 0, 0)] = boxes[box_offset + 3];
|
||||
roi_scores[INPUT4_GET_INDEX(i, 0, 0, 0)] = boxes[box_offset + 4];
|
||||
} else {
|
||||
rois[rois_offset + 0] = 0.0f;
|
||||
rois[rois_offset + 1] = 0.0f;
|
||||
rois[rois_offset + 2] = 0.0f;
|
||||
rois[rois_offset + 3] = 0.0f;
|
||||
roi_scores[i] = 0.0f;
|
||||
rois[OUTPUT_GET_INDEX(i, 0, 0, 0)] = 0.0f;
|
||||
rois[OUTPUT_GET_INDEX(i, 1, 0, 0)] = 0.0f;
|
||||
rois[OUTPUT_GET_INDEX(i, 2, 0, 0)] = 0.0f;
|
||||
rois[OUTPUT_GET_INDEX(i, 3, 0, 0)] = 0.0f;
|
||||
roi_scores[INPUT4_GET_INDEX(i, 0, 0, 0)] = 0.0f;
|
||||
}
|
||||
}
|
||||
#endif /* EDGPSI_STAGE_3 */
|
||||
|
@ -15,10 +15,11 @@ ParamsKey ExperimentalDetectronGenerateProposalsSingleImageRef::GetSupportedKey(
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableAllInputLayout();
|
||||
k.EnableAllOutputLayout();
|
||||
k.EnableBatching();
|
||||
k.EnableDifferentTypes();
|
||||
k.EnableTensorPitches();
|
||||
return k;
|
||||
}
|
||||
|
||||
|
@ -12,6 +12,20 @@ using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
namespace {
|
||||
template <typename T>
|
||||
struct ExperimentalDetectronGenerateProposalsSingleImageParams {
|
||||
float min_size;
|
||||
float nms_threshold;
|
||||
int64_t pre_nms_count;
|
||||
int64_t post_nms_count;
|
||||
std::vector<T> expected_rois;
|
||||
std::vector<T> expected_roi_scores;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using ExperimentalDetectronGenerateProposalsSingleImageParamsWithLayout =
|
||||
std::tuple<ExperimentalDetectronGenerateProposalsSingleImageParams<T>, format::type>;
|
||||
|
||||
constexpr size_t image_height = 150;
|
||||
constexpr size_t image_width = 150;
|
||||
constexpr float image_scale = 1.0f;
|
||||
@ -55,17 +69,15 @@ const std::vector<float> scores{
|
||||
0.5560748, 0.6952493, 0.6732593, 0.3306898, 0.6790913, 0.41128764, 0.34593266, 0.94296855, 0.7348507,
|
||||
0.24478768, 0.94024557, 0.05405676, 0.06466125, 0.36244348, 0.07942984, 0.10619422, 0.09412837, 0.9053611,
|
||||
0.22870538, 0.9237487, 0.20986171, 0.5067282, 0.29709867, 0.53138554, 0.189101, 0.4786443, 0.88421875};
|
||||
}; // namespace
|
||||
|
||||
template <typename T>
|
||||
struct ExperimentalDetectronGenerateProposalsSingleImageParams {
|
||||
float min_size;
|
||||
float nms_threshold;
|
||||
int64_t pre_nms_count;
|
||||
int64_t post_nms_count;
|
||||
std::vector<T> expected_rois;
|
||||
std::vector<T> expected_roi_scores;
|
||||
};
|
||||
const std::vector<format::type> layouts{
|
||||
format::bfyx,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::bs_fs_yx_bsv16_fsv16,
|
||||
format::bs_fs_yx_bsv16_fsv32,
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32};
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> getValues(const std::vector<float>& values) {
|
||||
@ -85,109 +97,6 @@ float getError<half_t>() {
|
||||
return 0.2;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct experimental_detectron_generate_proposals_single_image_test
|
||||
: public ::testing::TestWithParam<ExperimentalDetectronGenerateProposalsSingleImageParams<T> > {
|
||||
public:
|
||||
void test() {
|
||||
const ExperimentalDetectronGenerateProposalsSingleImageParams<T> param
|
||||
= testing::TestWithParam<ExperimentalDetectronGenerateProposalsSingleImageParams<T> >::GetParam();
|
||||
auto data_type = type_to_data_type<T>::value;
|
||||
|
||||
auto &engine = get_test_engine();
|
||||
|
||||
const primitive_id input_im_info_id = "InputImInfo";
|
||||
const auto input_im_info = engine.allocate_memory({data_type, format::bfyx, tensor{batch(3)}});
|
||||
set_values(input_im_info, getValues<T>(im_info));
|
||||
|
||||
const primitive_id input_anchors_id = "InputAnchors";
|
||||
auto input_anchors = engine.allocate_memory(
|
||||
{data_type, format::bfyx, tensor{batch(height * width * number_of_channels), feature(4)}});
|
||||
set_values(input_anchors, getValues<T>(anchors));
|
||||
|
||||
const primitive_id input_deltas_id = "InputDeltas";
|
||||
auto input_deltas = engine.allocate_memory(
|
||||
{data_type, format::bfyx,
|
||||
tensor{batch(number_of_channels * 4), feature(height), spatial(1, width)}});
|
||||
set_values(input_deltas, getValues<T>(deltas));
|
||||
|
||||
const primitive_id input_scores_id = "InputScores";
|
||||
auto input_scores = engine.allocate_memory(
|
||||
{data_type, format::bfyx, tensor{batch(number_of_channels), feature(height), spatial(1, width)}});
|
||||
set_values(input_scores, getValues<T>(scores));
|
||||
|
||||
const primitive_id output_roi_scores_id = "OutputRoiScores";
|
||||
auto output_roi_scores =
|
||||
engine.allocate_memory({data_type, format::bfyx, tensor{batch(param.post_nms_count)}});
|
||||
|
||||
topology topology;
|
||||
|
||||
topology.add(input_layout(input_im_info_id, input_im_info->get_layout()));
|
||||
topology.add(input_layout(input_anchors_id, input_anchors->get_layout()));
|
||||
topology.add(input_layout(input_deltas_id, input_deltas->get_layout()));
|
||||
topology.add(input_layout(input_scores_id, input_scores->get_layout()));
|
||||
topology.add(mutable_data(output_roi_scores_id, output_roi_scores));
|
||||
|
||||
const primitive_id edgpsi_id = "experimental_detectron_generate_proposals_single_image";
|
||||
const auto edgpsi_primitive = experimental_detectron_generate_proposals_single_image{edgpsi_id,
|
||||
input_im_info_id,
|
||||
input_anchors_id,
|
||||
input_deltas_id,
|
||||
input_scores_id,
|
||||
output_roi_scores_id,
|
||||
param.min_size,
|
||||
param.nms_threshold,
|
||||
param.pre_nms_count,
|
||||
param.post_nms_count};
|
||||
topology.add(edgpsi_primitive);
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data(input_im_info_id, input_im_info);
|
||||
network.set_input_data(input_anchors_id, input_anchors);
|
||||
network.set_input_data(input_deltas_id, input_deltas);
|
||||
network.set_input_data(input_scores_id, input_scores);
|
||||
|
||||
const auto outputs = network.execute();
|
||||
|
||||
const auto rois = outputs.at(edgpsi_id).get_memory();
|
||||
|
||||
const cldnn::mem_lock<T> rois_ptr(rois, get_test_stream());
|
||||
ASSERT_EQ(rois_ptr.size(), param.post_nms_count * 4);
|
||||
|
||||
const cldnn::mem_lock<T> roi_scores_ptr(output_roi_scores, get_test_stream());
|
||||
ASSERT_EQ(roi_scores_ptr.size(), param.post_nms_count);
|
||||
|
||||
const auto &expected_roi_scores = param.expected_roi_scores;
|
||||
const auto &expected_rois = param.expected_rois;
|
||||
for (int64_t i = 0; i < param.post_nms_count; ++i) {
|
||||
EXPECT_NEAR(expected_roi_scores[i], roi_scores_ptr[i], 0.001) << "i=" << i;
|
||||
|
||||
// order of proposals with zero scores is not guaranteed (to be precise,
|
||||
// it is not guaranteed for any equal score values)
|
||||
if (static_cast<float>(expected_roi_scores[i]) != 0.0f) {
|
||||
for (size_t coord = 0; coord < 4; ++coord) {
|
||||
const auto roi_idx = i * 4 + coord;
|
||||
EXPECT_NEAR(expected_rois[roi_idx], rois_ptr[roi_idx], getError<T>()) << "i=" << i << ", coord=" << coord;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using experimental_detectron_generate_proposals_single_image_test_f32 = experimental_detectron_generate_proposals_single_image_test<float>;
|
||||
using experimental_detectron_generate_proposals_single_image_test_f16 = experimental_detectron_generate_proposals_single_image_test<half_t>;
|
||||
|
||||
|
||||
TEST_P(experimental_detectron_generate_proposals_single_image_test_f32, basic) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
}
|
||||
|
||||
TEST_P(experimental_detectron_generate_proposals_single_image_test_f16, basic) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<ExperimentalDetectronGenerateProposalsSingleImageParams<T>> getExperimentalDetectronGenerateProposalsSingleImageParams() {
|
||||
std::vector<ExperimentalDetectronGenerateProposalsSingleImageParams<T>> params = {
|
||||
@ -241,12 +150,191 @@ std::vector<ExperimentalDetectronGenerateProposalsSingleImageParams<T>> getExper
|
||||
return params;
|
||||
}
|
||||
|
||||
struct PrintToStringParamName {
|
||||
template<class T>
|
||||
std::string operator()(const testing::TestParamInfo<ExperimentalDetectronGenerateProposalsSingleImageParamsWithLayout<T> > ¶m) {
|
||||
std::stringstream buf;
|
||||
ExperimentalDetectronGenerateProposalsSingleImageParams<T> p;
|
||||
format::type layout;
|
||||
std::tie(p, layout) = param.param;
|
||||
|
||||
buf << "min_size=" << p.min_size << "_";
|
||||
buf << "nms_threshold=" << p.nms_threshold << "_";
|
||||
buf << "pre_nms_count=" << p.pre_nms_count << "_";
|
||||
buf << "post_nms_count=" << p.post_nms_count << "_";
|
||||
buf << "layout=" << fmt_to_str(layout);
|
||||
return buf.str();
|
||||
}
|
||||
};
|
||||
}; // namespace
|
||||
|
||||
template <typename T>
|
||||
struct experimental_detectron_generate_proposals_single_image_test
|
||||
: public ::testing::TestWithParam<ExperimentalDetectronGenerateProposalsSingleImageParamsWithLayout<T> > {
|
||||
public:
|
||||
void test() {
|
||||
ExperimentalDetectronGenerateProposalsSingleImageParams<T> param;
|
||||
format::type data_layout;
|
||||
std::tie(param, data_layout) = this->GetParam();
|
||||
const auto data_type = type_to_data_type<T>::value;
|
||||
|
||||
auto &engine = get_test_engine();
|
||||
|
||||
const primitive_id input_im_info_id = "InputImInfo";
|
||||
const auto input_im_info = engine.allocate_memory({data_type, format::bfyx, tensor{batch(3)}});
|
||||
set_values(input_im_info, getValues<T>(im_info));
|
||||
|
||||
const primitive_id input_anchors_id = "InputAnchors";
|
||||
auto input_anchors = engine.allocate_memory(
|
||||
{data_type, format::bfyx, tensor{batch(height * width * number_of_channels), feature(4)}});
|
||||
set_values(input_anchors, getValues<T>(anchors));
|
||||
|
||||
const primitive_id input_deltas_id = "InputDeltas";
|
||||
auto input_deltas = engine.allocate_memory(
|
||||
{data_type, format::bfyx,
|
||||
tensor{batch(number_of_channels * 4), feature(height), spatial(1, width)}});
|
||||
set_values(input_deltas, getValues<T>(deltas));
|
||||
|
||||
const primitive_id input_scores_id = "InputScores";
|
||||
auto input_scores = engine.allocate_memory(
|
||||
{data_type, format::bfyx, tensor{batch(number_of_channels), feature(height), spatial(1, width)}});
|
||||
set_values(input_scores, getValues<T>(scores));
|
||||
|
||||
const primitive_id output_roi_scores_id = "OutputRoiScores";
|
||||
const layout rois_scores_layout{data_type, data_layout, tensor{batch(param.post_nms_count)}};
|
||||
auto output_roi_scores = engine.allocate_memory(rois_scores_layout);
|
||||
|
||||
const primitive_id reorder_im_info_id = input_im_info_id + "Reordered";
|
||||
const primitive_id reorder_anchors_id = input_anchors_id + "Reordered";
|
||||
const primitive_id reorder_deltas_id = input_deltas_id + "Reordered";
|
||||
const primitive_id reorder_scores_id = input_scores_id + "Reordered";
|
||||
|
||||
topology topology;
|
||||
|
||||
topology.add(input_layout(input_im_info_id, input_im_info->get_layout()));
|
||||
topology.add(input_layout(input_anchors_id, input_anchors->get_layout()));
|
||||
topology.add(input_layout(input_deltas_id, input_deltas->get_layout()));
|
||||
topology.add(input_layout(input_scores_id, input_scores->get_layout()));
|
||||
topology.add(mutable_data(output_roi_scores_id, output_roi_scores));
|
||||
|
||||
topology.add(reorder(reorder_im_info_id, input_im_info_id, data_layout, data_type));
|
||||
topology.add(reorder(reorder_anchors_id, input_anchors_id, data_layout, data_type));
|
||||
topology.add(reorder(reorder_deltas_id, input_deltas_id, data_layout, data_type));
|
||||
topology.add(reorder(reorder_scores_id, input_scores_id, data_layout, data_type));
|
||||
|
||||
const primitive_id edgpsi_id = "experimental_detectron_generate_proposals_single_image";
|
||||
const auto edgpsi_primitive = experimental_detectron_generate_proposals_single_image{edgpsi_id,
|
||||
reorder_im_info_id,
|
||||
reorder_anchors_id,
|
||||
reorder_deltas_id,
|
||||
reorder_scores_id,
|
||||
output_roi_scores_id,
|
||||
param.min_size,
|
||||
param.nms_threshold,
|
||||
param.pre_nms_count,
|
||||
param.post_nms_count};
|
||||
topology.add(edgpsi_primitive);
|
||||
|
||||
const primitive_id reorder_result_id = edgpsi_id + "Reordered";
|
||||
topology.add(reorder(reorder_result_id, edgpsi_primitive, format::bfyx, data_type));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data(input_im_info_id, input_im_info);
|
||||
network.set_input_data(input_anchors_id, input_anchors);
|
||||
network.set_input_data(input_deltas_id, input_deltas);
|
||||
network.set_input_data(input_scores_id, input_scores);
|
||||
|
||||
const auto outputs = network.execute();
|
||||
|
||||
const auto rois = outputs.at(reorder_result_id).get_memory();
|
||||
|
||||
const cldnn::mem_lock<T> rois_ptr(rois, get_test_stream());
|
||||
ASSERT_EQ(rois_ptr.size(), param.post_nms_count * 4);
|
||||
|
||||
cldnn::topology reorder_topology;
|
||||
reorder_topology.add(input_layout("scores", rois_scores_layout));
|
||||
reorder_topology.add(reorder("plane_scores", "scores", format::bfyx, data_type));
|
||||
cldnn::network reorder_net{engine, reorder_topology};
|
||||
reorder_net.set_input_data("scores", output_roi_scores);
|
||||
const auto second_output_result = reorder_net.execute();
|
||||
const auto plane_data_mem = second_output_result.at("plane_scores").get_memory();
|
||||
|
||||
const cldnn::mem_lock<T> roi_scores_ptr(plane_data_mem, get_test_stream());
|
||||
ASSERT_EQ(roi_scores_ptr.size(), param.post_nms_count);
|
||||
|
||||
const auto &expected_roi_scores = param.expected_roi_scores;
|
||||
const auto &expected_rois = param.expected_rois;
|
||||
for (int64_t i = 0; i < param.post_nms_count; ++i) {
|
||||
EXPECT_NEAR(expected_roi_scores[i], roi_scores_ptr[i], 0.001) << "i=" << i;
|
||||
|
||||
// order of proposals with zero scores is not guaranteed (to be precise,
|
||||
// it is not guaranteed for any equal score values)
|
||||
if (static_cast<float>(expected_roi_scores[i]) != 0.0f) {
|
||||
for (size_t coord = 0; coord < 4; ++coord) {
|
||||
const auto roi_idx = i * 4 + coord;
|
||||
EXPECT_NEAR(expected_rois[roi_idx], rois_ptr[roi_idx], getError<T>()) << "i=" << i << ", coord=" << coord;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using experimental_detectron_generate_proposals_single_image_test_f32 = experimental_detectron_generate_proposals_single_image_test<float>;
|
||||
TEST_P(experimental_detectron_generate_proposals_single_image_test_f32, basic) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
}
|
||||
using experimental_detectron_generate_proposals_single_image_test_f16 = experimental_detectron_generate_proposals_single_image_test<half_t>;
|
||||
TEST_P(experimental_detectron_generate_proposals_single_image_test_f16, basic) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
experimental_detectron_generate_proposals_single_image_gpu_test,
|
||||
experimental_detectron_generate_proposals_single_image_test_f32,
|
||||
::testing::ValuesIn(getExperimentalDetectronGenerateProposalsSingleImageParams<float>()));
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(getExperimentalDetectronGenerateProposalsSingleImageParams<float>()),
|
||||
::testing::Values(format::bfyx)
|
||||
),
|
||||
PrintToStringParamName()
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
experimental_detectron_generate_proposals_single_image_gpu_test,
|
||||
experimental_detectron_generate_proposals_single_image_test_f16,
|
||||
::testing::ValuesIn(getExperimentalDetectronGenerateProposalsSingleImageParams<half_t>()));
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(getExperimentalDetectronGenerateProposalsSingleImageParams<half_t>()),
|
||||
::testing::Values(format::bfyx)
|
||||
),
|
||||
PrintToStringParamName()
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
experimental_detectron_generate_proposals_single_image_gpu_test_all_formats,
|
||||
experimental_detectron_generate_proposals_single_image_test_f32,
|
||||
::testing::Combine(
|
||||
::testing::Values(
|
||||
ExperimentalDetectronGenerateProposalsSingleImageParams<float>{
|
||||
5.0f, 0.71f, 10, 15,
|
||||
{43.171, 0.31823, 53.5592, 149,
|
||||
0, 75.2272, 149, 87.2278,
|
||||
141.058, 114.876, 149, 149,
|
||||
149, 149, 149, 149,
|
||||
30.2866, 149, 149, 149,
|
||||
149, 149, 149, 149,
|
||||
149, 126.679, 149, 149,
|
||||
149, 6.53844, 149, 149,
|
||||
149, 0, 149, 149,
|
||||
149, 149, 149, 149,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0},
|
||||
{0.695249, 0.411288, 0.0941284, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}
|
||||
),
|
||||
::testing::ValuesIn(layouts)
|
||||
),
|
||||
PrintToStringParamName()
|
||||
);
|
||||
|
Loading…
Reference in New Issue
Block a user