[IE CLDNN] Mixed mode support for proposal primitive (#1857)

This commit is contained in:
Vladimir Paramuzov 2020-08-27 11:43:24 +03:00 committed by GitHub
parent 45070963a5
commit ae8be58701
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 166 additions and 53 deletions

View File

@ -0,0 +1,49 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/proposal.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace ngraph::helpers;
using namespace LayerTestsDefinitions;
namespace {
/* ============= Proposal ============= */
const std::vector<base_size_type> base_size_ = {16};
const std::vector<pre_nms_topn_type> pre_nms_topn_ = {100};
const std::vector<post_nms_topn_type> post_nms_topn_ = {100};
const std::vector<nms_thresh_type> nms_thresh_ = {0.7f};
const std::vector<min_size_type> min_size_ = {1};
const std::vector<ratio_type> ratio_ = {{1.0f, 2.0f}};
const std::vector<scale_type> scale_ = {{1.2f, 1.5f}};
const std::vector<clip_before_nms_type> clip_before_nms_ = {false};
const std::vector<clip_after_nms_type> clip_after_nms_ = {false};
// empty string corresponds to Caffe framework
const std::vector<framework_type> framework_ = {""};
const auto proposalParams = ::testing::Combine(
::testing::ValuesIn(base_size_),
::testing::ValuesIn(pre_nms_topn_),
::testing::ValuesIn(post_nms_topn_),
::testing::ValuesIn(nms_thresh_),
::testing::ValuesIn(min_size_),
::testing::ValuesIn(ratio_),
::testing::ValuesIn(scale_),
::testing::ValuesIn(clip_before_nms_),
::testing::ValuesIn(clip_after_nms_),
::testing::ValuesIn(framework_)
);
INSTANTIATE_TEST_CASE_P(Proposal_tests, ProposalLayerTest,
::testing::Combine(
proposalParams,
::testing::Values(CommonTestUtils::DEVICE_GPU)),
ProposalLayerTest::getTestCaseName
);
} // namespace

View File

@ -1,5 +1,5 @@
/*
// Copyright (c) 2016-2018 Intel Corporation
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -196,44 +196,39 @@ std::vector<roi_t> perform_nms(const std::vector<proposal_t>& proposals,
* *
****************************************************************************/
struct im_info_t {
int img_w;
int img_h;
int img_z;
int min_bbox_x;
int min_bbox_y;
};
struct proposal_gpu : typed_primitive_impl<proposal> {
const proposal_node& outer;
explicit proposal_gpu(const proposal_node& arg) : outer(arg) {}
template <typename dtype>
void execute(proposal_inst& instance, dtype* proposal_prob_ptr = nullptr) {
const std::vector<proposal_inst::anchor>& anchors = instance.get_anchors();
size_t anchors_num = anchors.size();
auto& cls_scores = instance.dep_memory(proposal_inst::cls_scores_index);
auto& bbox_pred = instance.dep_memory(proposal_inst::bbox_pred_index);
void read_image_info(proposal_inst& instance, im_info_t& im_info) {
auto& image_info = instance.dep_memory(proposal_inst::image_info_index);
// original input image to the graph (after possible scaling etc.) so that coordinates are valid for it
mem_lock<dtype> image_info_ptr{image_info};
const dtype* image_info_mem = image_info_ptr.data();
bool swap_xy = instance.argument.swap_xy;
// original input image to the graph (after possible scaling etc.) so that coordinates are valid for it
int img_w = 1;
int img_h = 1;
int img_z = 1;
int min_bbox_x = 1;
int min_bbox_y = 1;
int scaled_min_bbox_size = instance.argument.min_bbox_size;
bool swap_xy = instance.argument.swap_xy;
bool initial_clip = instance.argument.initial_clip;
bool clip_before_nms = instance.argument.clip_before_nms;
bool clip_after_nms = instance.argument.clip_after_nms;
float coordinates_offset = instance.argument.coordinates_offset;
float box_coordinate_scale = instance.argument.box_coordinate_scale;
float box_size_scale = instance.argument.box_size_scale;
bool for_deformable = instance.argument.for_deformable;
auto image_info_size = image_info.get_layout().size;
auto image_info_count = image_info_size.feature[0] == 1 ? image_info_size.batch[0] : image_info_size.feature[0];
int scaled_min_bbox_size = instance.argument.min_bbox_size;
if (image_info_count == 4) {
img_w =
static_cast<int>(float_read_helper(image_info_mem + proposal_inst::image_info_width_index) + EPSILON);
@ -268,6 +263,31 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
std::swap(img_w, img_h);
}
im_info.img_h = img_h;
im_info.img_w = img_w;
im_info.img_z = img_z;
im_info.min_bbox_x = min_bbox_x;
im_info.min_bbox_y = min_bbox_y;
}
template <typename dtype>
void execute(proposal_inst& instance, im_info_t im_info, dtype* proposal_prob_ptr = nullptr) {
const std::vector<proposal_inst::anchor>& anchors = instance.get_anchors();
size_t anchors_num = anchors.size();
auto& cls_scores = instance.dep_memory(proposal_inst::cls_scores_index);
auto& bbox_pred = instance.dep_memory(proposal_inst::bbox_pred_index);
bool swap_xy = instance.argument.swap_xy;
bool initial_clip = instance.argument.initial_clip;
bool clip_before_nms = instance.argument.clip_before_nms;
bool clip_after_nms = instance.argument.clip_after_nms;
float coordinates_offset = instance.argument.coordinates_offset;
float box_coordinate_scale = instance.argument.box_coordinate_scale;
float box_size_scale = instance.argument.box_size_scale;
bool for_deformable = instance.argument.for_deformable;
// feat map sizes
const auto& score_size = cls_scores.get_layout().size;
int fm_h = score_size.spatial[1];
@ -311,8 +331,8 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
bbox_delta,
anchor_shift_x,
anchor_shift_y,
img_w,
img_h,
im_info.img_w,
im_info.img_h,
coordinates_offset,
initial_clip,
clip_before_nms,
@ -323,7 +343,7 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
size_t scores_index =
n * num_proposals * 2 + location_index + fm_sz * (anchor_index + anchors_num);
float proposal_confidence = (min_bbox_x <= bbox_w) * (min_bbox_y <= bbox_h) *
float proposal_confidence = (im_info.min_bbox_x <= bbox_w) * (im_info.min_bbox_y <= bbox_h) *
float_read_helper(cls_scores_mem + scores_index);
sorted_proposals_confidence.emplace_back(roi,
proposal_confidence,
@ -350,17 +370,17 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
for (size_t i = 0; i < res_num_rois; ++i) {
if (clip_after_nms) {
res[i].x0 = clamp(res[i].x0, 0.0f, static_cast<float>(img_w));
res[i].y0 = clamp(res[i].y0, 0.0f, static_cast<float>(img_h));
res[i].x1 = clamp(res[i].x1, 0.0f, static_cast<float>(img_w));
res[i].y1 = clamp(res[i].y1, 0.0f, static_cast<float>(img_h));
res[i].x0 = clamp(res[i].x0, 0.0f, static_cast<float>(im_info.img_w));
res[i].y0 = clamp(res[i].y0, 0.0f, static_cast<float>(im_info.img_h));
res[i].x1 = clamp(res[i].x1, 0.0f, static_cast<float>(im_info.img_w));
res[i].y1 = clamp(res[i].y1, 0.0f, static_cast<float>(im_info.img_h));
}
float_write_helper(top_data + 5 * i + 0, static_cast<float>(n));
float_write_helper(top_data + 5 * i + 1, res[i].x0 / (instance.argument.normalize ? img_w : 1.0f));
float_write_helper(top_data + 5 * i + 2, res[i].y0 / (instance.argument.normalize ? img_h : 1.0f));
float_write_helper(top_data + 5 * i + 3, res[i].x1 / (instance.argument.normalize ? img_w : 1.0f));
float_write_helper(top_data + 5 * i + 4, res[i].y1 / (instance.argument.normalize ? img_h : 1.0f));
float_write_helper(top_data + 5 * i + 1, res[i].x0 / (instance.argument.normalize ? im_info.img_w : 1.0f));
float_write_helper(top_data + 5 * i + 2, res[i].y0 / (instance.argument.normalize ? im_info.img_h : 1.0f));
float_write_helper(top_data + 5 * i + 3, res[i].x1 / (instance.argument.normalize ? im_info.img_w : 1.0f));
float_write_helper(top_data + 5 * i + 4, res[i].y1 / (instance.argument.normalize ? im_info.img_h : 1.0f));
if (top_data_prob != nullptr && i < sorted_proposals_confidence.size()) {
float_write_helper(top_data_prob + i, sorted_proposals_confidence[i].confidence);
}
@ -384,21 +404,31 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
}
auto ev = instance.get_network().get_engine().create_user_event(instance.get_network().get_id(), false);
im_info_t im_info;
if (instance.dep_memory(proposal_inst::image_info_index).get_layout().data_type == data_types::f16) {
read_image_info<data_type_to_type<data_types::f16>::type>(instance, im_info);
} else {
read_image_info<data_type_to_type<data_types::f32>::type>(instance, im_info);
}
if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type !=
instance.dep_memory(proposal_inst::bbox_pred_index).get_layout().data_type)
throw std::runtime_error("clDNN: proposal primitive doesn't support mixed bbox and scores types");
if (instance.dependencies().size() == 4) {
auto &proposal_probabilities = instance.dep_memory(proposal_inst::proposal_probabilities_out);
if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16) {
mem_lock<data_type_to_type<data_types::f16>::type> proposal_prob_ptr{proposal_probabilities};
execute<data_type_to_type<data_types::f16>::type>(instance, proposal_prob_ptr.data());
execute<data_type_to_type<data_types::f16>::type>(instance, im_info, proposal_prob_ptr.data());
} else {
mem_lock<data_type_to_type<data_types::f32>::type> proposal_prob_ptr{proposal_probabilities};
execute<data_type_to_type<data_types::f32>::type>(instance, proposal_prob_ptr.data());
execute<data_type_to_type<data_types::f32>::type>(instance, im_info, proposal_prob_ptr.data());
}
} else {
if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16) {
execute<data_type_to_type<data_types::f16>::type>(instance);
execute<data_type_to_type<data_types::f16>::type>(instance, im_info);
} else {
execute<data_type_to_type<data_types::f32>::type>(instance);
execute<data_type_to_type<data_types::f32>::type>(instance, im_info);
}
}

View File

@ -1,5 +1,5 @@
/*
// Copyright (c) 2017 Intel Corporation
// Copyright (c) 2017-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -71,13 +71,13 @@ const int image_z = 1;
const std::vector<float> ratios = { 0.5f, 1.0f, 2.0f };
const std::vector<float> scales = { 8.0f, 16.0f, 32.0f };
template <typename Dtype>
template <typename Dtype, typename ImInfoType = Dtype>
class TestRunnerProposal
{
public:
explicit TestRunnerProposal(cldnn::tensor image_info_size);
memory Run(std::vector<Dtype>& data,
memory Run(std::vector<Dtype>& data,
std::vector<Dtype>& rois);
private:
@ -90,13 +90,13 @@ class TestRunnerProposal
std::unique_ptr<network> _network;
};
template <typename Dtype>
TestRunnerProposal<Dtype>::TestRunnerProposal(cldnn::tensor image_info_size) :
template <typename Dtype, typename ImInfoType>
TestRunnerProposal<Dtype, ImInfoType>::TestRunnerProposal(cldnn::tensor image_info_size) :
_cls_scores_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, { 1, 18, 23, 14 } ),
_bbox_pred_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, { 1, 36, 23, 14 } ),
_image_info_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, image_info_size),
_test_layer(layer_name,
cls_scores_name,
_image_info_layout(cldnn::type_to_data_type<ImInfoType>::value, format::bfyx, image_info_size),
_test_layer(layer_name,
cls_scores_name,
bbox_pred_name,
image_info_name,
max_proposals,
@ -108,7 +108,7 @@ TestRunnerProposal<Dtype>::TestRunnerProposal(cldnn::tensor image_info_size) :
ratios,
scales,
padding())
{
{
_topology.add(input_layout(cls_scores_name, _cls_scores_layout));
_topology.add(input_layout(bbox_pred_name, _bbox_pred_layout));
_topology.add(input_layout(image_info_name, _image_info_layout));
@ -118,26 +118,26 @@ TestRunnerProposal<Dtype>::TestRunnerProposal(cldnn::tensor image_info_size) :
_network.reset(new network(_engine, _topology));
}
template <typename Dtype>
memory TestRunnerProposal<Dtype>::Run(std::vector<Dtype>& cls_scores_vals,
std::vector<Dtype>& bbox_pred_vals)
template <typename Dtype, typename ImInfoType>
memory TestRunnerProposal<Dtype, ImInfoType>::Run(std::vector<Dtype>& cls_scores_vals,
std::vector<Dtype>& bbox_pred_vals)
{
memory cls_scores = memory::attach(_cls_scores_layout, cls_scores_vals.data(), cls_scores_vals.size());
memory bbox_pred = memory::attach(_bbox_pred_layout, bbox_pred_vals.data(), bbox_pred_vals.size());
std::vector<Dtype> image_info_vals = { (Dtype)((float)image_h - 0.0000001f), // check fp robustness of the layer
(Dtype)((float)image_w + 0.0000001f), // check fp robustness of the layer
(Dtype)((float)image_z) };
std::vector<ImInfoType> image_info_vals = { (ImInfoType)((float)image_h - 0.0000001f), // check fp robustness of the layer
(ImInfoType)((float)image_w + 0.0000001f), // check fp robustness of the layer
(ImInfoType)((float)image_z) };
memory image_info = memory::allocate(_engine, _image_info_layout);
tests::set_values(image_info, image_info_vals);
_network->set_input_data(cls_scores_name, cls_scores);
_network->set_input_data(bbox_pred_name, bbox_pred);
_network->set_input_data(image_info_name, image_info);
std::map<primitive_id, network_output> network_output = _network->execute();
EXPECT_EQ(network_output.begin()->first, layer_name);
return network_output.at(layer_name).get_memory();
return network_output.at(layer_name).get_memory();
}
TEST(proposal, basic) {
@ -159,7 +159,7 @@ TEST(proposal, basic) {
TEST(proposal, fp16) {
std::vector<FLOAT16> cls_scores(&cls_scores_data[0], &cls_scores_data[cls_scores_data_size]);
std::vector<FLOAT16> bbox_pred(&bbox_pred_data[0], &bbox_pred_data[bbox_pred_data_size]);
TestRunnerProposal<FLOAT16> t({ 1, 3, 1, 1 });
const memory& output = t.Run(cls_scores, bbox_pred);
@ -173,6 +173,40 @@ TEST(proposal, fp16) {
}
}
TEST(proposal, scores_fp16_im_info_fp32) {
std::vector<FLOAT16> cls_scores(&cls_scores_data[0], &cls_scores_data[cls_scores_data_size]);
std::vector<FLOAT16> bbox_pred(&bbox_pred_data[0], &bbox_pred_data[bbox_pred_data_size]);
TestRunnerProposal<FLOAT16, float> t({ 1, 3, 1, 1 });
const memory& output = t.Run(cls_scores, bbox_pred);
ASSERT_EQ(output.get_layout().count(), proposal_ref_size);
auto d = output.pointer<FLOAT16>();
for (size_t i = 0; i < proposal_ref_size; i++) {
FLOAT16 ref(proposal_ref[i]);
EXPECT_NEAR((float)d[i], (float)ref, epsilon_fp16);
}
}
TEST(proposal, scores_fp32_im_info_fp16) {
std::vector<float> cls_scores(&cls_scores_data[0], &cls_scores_data[cls_scores_data_size]);
std::vector<float> bbox_pred(&bbox_pred_data[0], &bbox_pred_data[bbox_pred_data_size]);
TestRunnerProposal<float, FLOAT16> t({ 1, 3, 1, 1 });
const memory& output = t.Run(cls_scores, bbox_pred);
ASSERT_EQ(output.get_layout().count(), proposal_ref_size);
auto d = output.pointer<float>();
for (size_t i = 0; i < proposal_ref_size; i++) {
float ref(proposal_ref[i]);
EXPECT_NEAR((float)d[i], (float)ref, epsilon);
}
}
TEST(proposal, img_info_batched) {
std::vector<float> cls_scores(&cls_scores_data[0], &cls_scores_data[cls_scores_data_size]);
std::vector<float> bbox_pred(&bbox_pred_data[0], &bbox_pred_data[bbox_pred_data_size]);