From b86ab12f0fe97d5232504ed55a60c1eabec5a725 Mon Sep 17 00:00:00 2001
From: Yury Gaydaychuk <yury.gaydaychuk@intel.com>
Date: Tue, 17 Aug 2021 23:20:54 +0300
Subject: [PATCH] [CPU][Release 2021.4.1] Fix roipooling border proposal
 computation for 2021.4 (#7116)

* roi_pooling handles border props correctly

* fix adapted for old test infrastructure
---
 .../nodes/mkldnn_roi_pooling_node.cpp         | 103 ++++++++++++------
 .../cpu/single_layer_tests/roi_pooling.cpp    |  60 +++++++++-
 .../common_test_utils/data_utils.hpp          |   6 +-
 .../ngraph/runtime/reference/roi_pooling.hpp  |  38 +++++--
 ngraph/test/backend/roi_pooling.in.cpp        |  51 +++++++++
 5 files changed, 210 insertions(+), 48 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
index 77db7621692..346bc1079a9 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
@@ -513,13 +513,18 @@ void MKLDNNROIPoolingNode::execute() {
             if (roi_pooling_kernel) {
                 arg.bin_area = 0;
                 arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
+                (*roi_pooling_kernel)(&arg);
             } else {
-                for (int c = 0; c < c_block; c++) {
-                    dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
+                for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
+                    int ch_blk_cur = cbb * cb_num + cbb_cur;
+                    if (ch_blk_cur >= jpp.nb_c) {
+                        break;  // current block work is done
+                    }
+                    for (int c = 0; c < c_block; c++) {
+                        dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
+                    }
                 }
             }
-
-            (*roi_pooling_kernel)(&arg);
         } else {
             size_t roi_off = n * src_roi_step;
             const auto *src_roi_ptr = &src_roi[roi_off];
@@ -569,18 +574,23 @@ void MKLDNNROIPoolingNode::execute() {
                     arg.kh = hend - hstart;
                     arg.kw = wend - wstart;
                 } else {
-                    for (int c = 0; c < c_block; c++) {
-                        const size_t pool_index = n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c;
-                        if ((hend <= hstart) || (wend <= wstart)) {
-                            dst[pool_index] = 0;
-                        } else {
-                            for (int h = hstart; h < hend; ++h) {
-                                for (int w = wstart; w < wend; ++w) {
-                                    float batch_data = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                h * src_strides[2] + w * src_strides[3] + c];
-
-                                    if (batch_data > dst[pool_index]) {
-                                        dst[pool_index] = batch_data;
+                    for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
+                        int ch_blk_cur = cbb * cb_num + cbb_cur;
+                        if (ch_blk_cur >= jpp.nb_c) {
+                            break;  // current block work is done
+                        }
+                        for (int c = 0; c < c_block; c++) {
+                            const size_t pool_index = n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c;
+                            if ((hend <= hstart) || (wend <= wstart)) {
+                                dst[pool_index] = 0;
+                            } else {
+                                dst[pool_index] =  src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                            hstart * src_strides[2] + wstart * src_strides[3] + c];
+                                for (int h = hstart; h < hend; ++h) {
+                                    for (int w = wstart; w < wend; ++w) {
+                                        float batch_data = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    h * src_strides[2] + w * src_strides[3] + c];
+                                        dst[pool_index] = std::fmax(batch_data, dst[pool_index]);
                                     }
                                 }
                             }
@@ -596,18 +606,35 @@ void MKLDNNROIPoolingNode::execute() {
                 float height_scale = (jpp.pooled_h > 1 ? ((roi_end_h_ - roi_start_h_) * (jpp.ih - 1)) / (jpp.pooled_h - 1) : 0);
                 float width_scale  = (jpp.pooled_w > 1 ? ((roi_end_w_ - roi_start_w_) * (jpp.iw - 1)) / (jpp.pooled_w - 1) : 0);
 
-                float in_y = (jpp.pooled_h > 1 ? (oh * height_scale + roi_start_h_ * (jpp.ih - 1)) :
-                              0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1));
-                float in_x = (jpp.pooled_w > 1 ? (ow * width_scale  + roi_start_w_ * (jpp.iw - 1)) :
-                              0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1));
+                float in_y, in_x;
+                // because of nonalgebraic character of floating point operation, some proposals can cause violation of inequality:
+                // ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) <= (end_h - start_h) * (input_h - 1),
+                // and as result excess of right limit for proposal value,
+                // if the border case (current_h == pooled_h - 1) will not be handled explicitly
+                if (jpp.pooled_h > 1) {
+                    in_y = (oh == jpp.pooled_h - 1 ? roi_end_h_ * (jpp.ih - 1) : (oh * height_scale + roi_start_h_ * (jpp.ih - 1)));
+                } else {
+                    in_y = 0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1);
+                }
+                if (jpp.pooled_w > 1) {
+                    in_x = (ow == jpp.pooled_w - 1 ? roi_end_w_ * (jpp.iw - 1) : (ow * width_scale  + roi_start_w_ * (jpp.iw - 1)));
+                } else {
+                    in_x = 0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1);
+                }
 
                 if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) {
                     if (roi_pooling_kernel) {
                         arg.bin_area = 0;
                         arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
                     } else {
-                        for (int c = 0; c < c_block; c++) {
-                            dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
+                        for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
+                            int ch_blk_cur = cbb * cb_num + cbb_cur;
+                            if (ch_blk_cur >= jpp.nb_c) {
+                                break;  // current block work is done
+                            }
+                            for (int c = 0; c < c_block; c++) {
+                                dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
+                            }
                         }
                     }
                 } else {
@@ -636,21 +663,27 @@ void MKLDNNROIPoolingNode::execute() {
 
                         arg.bin_area = 1;
                     } else {
-                        for (int c = 0; c < 1; c++) {
-                            const float top_left     = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                top_y_index * src_strides[2] + left_x_index * src_strides[3] + c];
-                            const float top_right    = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                top_y_index * src_strides[2] + right_x_index * src_strides[3] + c];
-                            const float bottom_left  = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                bottom_y_index * src_strides[2] + left_x_index * src_strides[3] + c];
-                            const float bottom_right = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                bottom_y_index * src_strides[2] + right_x_index * src_strides[3] + c];
+                        for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
+                            int ch_blk_cur = cbb * cb_num + cbb_cur;
+                            if (ch_blk_cur >= jpp.nb_c) {
+                                break;  // current block work is done
+                            }
+                            for (int c = 0; c < c_block; c++) {
+                                const float top_left     = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    top_y_index * src_strides[2] + left_x_index * src_strides[3] + c];
+                                const float top_right    = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    top_y_index * src_strides[2] + right_x_index * src_strides[3] + c];
+                                const float bottom_left  = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    bottom_y_index * src_strides[2] + left_x_index * src_strides[3] + c];
+                                const float bottom_right = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    bottom_y_index * src_strides[2] + right_x_index * src_strides[3] + c];
 
-                            const float top    = top_left + (top_right - top_left) * (in_x - left_x_index);
-                            const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);
+                                const float top    = top_left + (top_right - top_left) * (in_x - left_x_index);
+                                const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);
 
-                            dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] =
-                                    top + (bottom - top) * (in_y - top_y_index);
+                                dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] =
+                                        top + (bottom - top) * (in_y - top_y_index);
+                            }
                         }
                     }
                 }
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp
index 7422befc949..3abab0db029 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp
@@ -10,9 +10,11 @@ using namespace InferenceEngine;
 using namespace CPUTestUtils;
 
 namespace CPULayerTestsDefinitions {
+enum ProposalGenerationMode { RANDOM, ULTIMATE_RIGHT_BORDER };
 
 using ROIPoolingCPUTestParamsSet = std::tuple<LayerTestsDefinitions::roiPoolingParamsTuple,
                                               CPUSpecificParams,
+                                              ProposalGenerationMode,
                                               std::map<std::string, std::string>>;
 
 class ROIPoolingCPULayerTest : public testing::WithParamInterface<ROIPoolingCPUTestParamsSet>,
@@ -22,9 +24,10 @@ public:
     static std::string getTestCaseName(testing::TestParamInfo<ROIPoolingCPUTestParamsSet> obj) {
         LayerTestsDefinitions::roiPoolingParamsTuple basicParamsSet;
         CPUSpecificParams cpuParams;
+        ProposalGenerationMode propMode;
         std::map<std::string, std::string> additionalConfig;
 
-        std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
+        std::tie(basicParamsSet, cpuParams, propMode, additionalConfig) = obj.param;
         std::ostringstream result;
 
         result << LayerTestsDefinitions::ROIPoolingLayerTest::getTestCaseName(
@@ -38,6 +41,15 @@ public:
                     result << "_" << item.first << "=" << item.second;
             }
         }
+        switch (propMode) {
+            case ProposalGenerationMode::ULTIMATE_RIGHT_BORDER:
+                result << "_UltimateRightBorderProposal";
+                break;
+            case ProposalGenerationMode::RANDOM:
+            default:
+                result << "_RandomProposal";
+                break;
+        }
 
         return result.str();
     }
@@ -55,6 +67,28 @@ protected:
         for (const auto &input : cnnNetwork.getInputsInfo()) {
             const auto &info = input.second;
             InferenceEngine::Blob::Ptr blob;
+            void (*propGenerator)(InferenceEngine::Blob::Ptr &);
+            switch (propMode) {
+                case ULTIMATE_RIGHT_BORDER:
+                    // because of nonalgebraic character of floating point operation, the following values causes inequity:
+                    // ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) > (end_h - start_h) * (input_h - 1)
+                    // and as result excess of right limit for proposal value if the border case (current_h == pooled_h - 1)
+                    // will not be handled explicitly
+                    propGenerator = [](InferenceEngine::Blob::Ptr &blob) {
+                        auto *data = blob->buffer().as<float *>();
+                        for (size_t i = 0; i < blob->size(); i += 5) {
+                            data[i] = 0;
+                            data[i + 1] = 0.f;
+                            data[i + 2] = 0.248046786f;
+                            data[i + 3] = 0.471333951f;
+                            data[i + 4] = 1.f;
+                        }
+                    };
+                    break;
+                case RANDOM:
+                default:
+                    propGenerator = nullptr;
+            }
 
             if (it == 1) {
                 blob = make_blob_with_precision(info->getTensorDesc());
@@ -62,12 +96,12 @@ protected:
                 switch (inPrc) {
                 case Precision::FP32: {
                     CommonTestUtils::fill_data_roi<Precision::FP32>
-                        (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode);
+                        (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode, 1, propGenerator);
                     break;
                 }
                 case Precision::BF16: {
                     CommonTestUtils::fill_data_roi<Precision::BF16>
-                        (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode);
+                        (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode, 1, propGenerator);
                     break;
                 }
                 default:
@@ -92,7 +126,7 @@ protected:
         InferenceEngine::SizeVector poolShape;
         InferenceEngine::Precision netPrecision;
 
-        std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
+        std::tie(basicParamsSet, cpuParams, propMode, additionalConfig) = this->GetParam();
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
         std::tie(inputShape, coordsShape, poolShape, spatial_scale, pool_method, netPrecision, targetDevice) = basicParamsSet;
 
@@ -118,6 +152,7 @@ protected:
 private:
     ngraph::helpers::ROIPoolingTypes pool_method;
     float spatial_scale;
+    ProposalGenerationMode propMode;
 };
 
 TEST_P(ROIPoolingCPULayerTest, CompareWithRefs) {
@@ -190,6 +225,7 @@ INSTANTIATE_TEST_CASE_P(smoke_ROIPoolingCPU_max,
                         ROIPoolingCPULayerTest,
                         ::testing::Combine(test_ROIPooling_max,
                                            ::testing::ValuesIn(selectCPUInfoForDevice()),
+                                           ::testing::Values(ProposalGenerationMode::RANDOM),
                                            ::testing::ValuesIn(additionalConfig)),
                         ROIPoolingCPULayerTest::getTestCaseName);
 
@@ -197,7 +233,23 @@ INSTANTIATE_TEST_CASE_P(smoke_ROIPoolingCPU_bilinear,
                         ROIPoolingCPULayerTest,
                         ::testing::Combine(test_ROIPooling_bilinear,
                                            ::testing::ValuesIn(selectCPUInfoForDevice()),
+                                           ::testing::Values(ProposalGenerationMode::RANDOM),
                                            ::testing::ValuesIn(additionalConfig)),
                         ROIPoolingCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_ROIPoolingCPU_bilinear_ultimateRightBorderProposal,
+                        ROIPoolingCPULayerTest,
+                        ::testing::Combine(::testing::Combine(::testing::Values(std::vector<size_t> { 1, 1, 50, 50 }),
+                                                              ::testing::Values(std::vector<size_t> { 1, 5 }),
+                                                              ::testing::Values(std::vector<size_t> { 4, 4 }),
+                                                              ::testing::Values(spatial_scales[1]),
+                                                              ::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR),
+                                                              ::testing::Values(InferenceEngine::Precision::FP32),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::ValuesIn(selectCPUInfoForDevice()),
+                                           ::testing::Values(ProposalGenerationMode::ULTIMATE_RIGHT_BORDER),
+                                           ::testing::Values(std::map<std::string, std::string>{
+                                               {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}})),
+                        ROIPoolingCPULayerTest::getTestCaseName);
 } // namespace
 } // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
index 2ba869a6aa5..05f8f92e4d3 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
@@ -94,7 +94,11 @@ size_t byte_size(const InferenceEngine::TensorDesc &tdesc);
 template<InferenceEngine::Precision::ePrecision PRC>
 inline void
 fill_data_roi(InferenceEngine::Blob::Ptr &blob, const uint32_t range, const int height, const int width, const float omega,
-              const bool is_roi_max_mode, const int seed = 1) {
+              const bool is_roi_max_mode, const int seed = 1, void (*propGenerator)(InferenceEngine::Blob::Ptr &) = nullptr) {
+    if (propGenerator != nullptr) {
+        propGenerator(blob);
+        return;
+    }
     using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
     auto *data = blob->buffer().as<dataType *>();
     std::default_random_engine random(seed);
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/roi_pooling.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/roi_pooling.hpp
index e37005c3e6a..5574061664c 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/roi_pooling.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/roi_pooling.hpp
@@ -138,14 +138,36 @@ namespace ngraph
                             {
                                 for (int pw = 0; pw < pooled_w; pw++)
                                 {
-                                    T in_y =
-                                        (pooled_h > 1)
-                                            ? (ph * roi_height_scale + roi_h_start * (height - 1))
-                                            : 0.5 * (roi_h_start + roi_h_end) * (height - 1);
-                                    T in_x =
-                                        (pooled_w > 1)
-                                            ? (pw * roi_width_scale + roi_w_start * (width - 1))
-                                            : 0.5 * (roi_w_end + roi_w_start) * (width - 1);
+                                    // because of nonalgebraic character of floating point
+                                    // operation, some proposals can cause violation of inequality:
+                                    // ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) *
+                                    // (pooled_h - 1)
+                                    // <= (end_h - start_h) * (input_h - 1),
+                                    // and as result excess of right limit for proposal value
+                                    // if the border case (current_h == pooled_h - 1)
+                                    // will not be handled explicitly
+                                    T in_y, in_x;
+                                    if (pooled_h > 1)
+                                    {
+                                        in_y =
+                                            ((ph == pooled_h - 1) ? (height - 1) * roi_h_end
+                                                                  : (ph * roi_height_scale +
+                                                                     roi_h_start * (height - 1)));
+                                    }
+                                    else
+                                    {
+                                        in_y = 0.5 * (roi_h_start + roi_h_end) * (height - 1);
+                                    }
+                                    if (pooled_w > 1)
+                                    {
+                                        in_x = ((pw == pooled_w - 1) ? (width - 1) * roi_w_end
+                                                                     : (pw * roi_width_scale +
+                                                                        roi_w_start * (width - 1)));
+                                    }
+                                    else
+                                    {
+                                        in_x = 0.5 * (roi_w_end + roi_w_start) * (width - 1);
+                                    }
 
                                     const size_t pool_index =
                                         roi_num * channels * pooled_h * pooled_w +
diff --git a/ngraph/test/backend/roi_pooling.in.cpp b/ngraph/test/backend/roi_pooling.in.cpp
index b37cf3c37b6..9c080a79df7 100644
--- a/ngraph/test/backend/roi_pooling.in.cpp
+++ b/ngraph/test/backend/roi_pooling.in.cpp
@@ -201,3 +201,54 @@ NGRAPH_TEST(${BACKEND_NAME}, roi_pooling_2x2_bilinear)
     test_case.add_expected_output<float>(output_shape, expected_vect);
     test_case.run();
 }
+
+NGRAPH_TEST(${BACKEND_NAME}, roi_pooling_2x2_bilinear_border_proposal)
+{
+    const int H = 50;
+    const int W = 50;
+    const int image_size = H * W;
+    const int channels = 1;
+    const int num_rois = 1;
+
+    const int pooled_h = 4;
+    const int pooled_w = 4;
+    const float spatial_scale = 1.f;
+
+    Shape feat_maps_shape{1, channels, H, W};
+    Shape rois_shape{num_rois, 5};
+    Shape pooled_shape{pooled_h, pooled_w};
+    Shape output_shape{num_rois, channels, pooled_h, pooled_w};
+
+    const auto feat_maps = make_shared<op::Parameter>(element::f32, feat_maps_shape);
+    const auto rois = make_shared<op::Parameter>(element::f32, rois_shape);
+    const auto roi_pooling =
+            make_shared<op::v0::ROIPooling>(feat_maps, rois, pooled_shape, spatial_scale, "bilinear");
+    const auto f = make_shared<Function>(roi_pooling, ParameterVector{feat_maps, rois});
+
+    vector<float> feat_maps_vect;
+    for (unsigned int i = 0; i < channels * image_size; i++)
+    {
+        feat_maps_vect.push_back(1.f);
+    }
+
+    // because of nonalgebraic character of floating point operation, some proposals can cause violation of inequality:
+    // ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) <= (end_h - start_h) * (input_h - 1),
+    // and as result excess of right limit for proposal value,
+    // if the border case (current_h == pooled_h - 1) will not be handled explicitly
+    vector<float> rois_vect = {0.f,
+                               0.f,
+                               0.248046786f,
+                               0.471333951f,
+                               1.f};
+
+    vector<float> expected_vect;
+    for (unsigned int i = 0; i < channels * pooled_h * pooled_w; i++)
+    {
+        expected_vect.push_back(1.f);
+    }
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_input<float>(feat_maps_shape, feat_maps_vect);
+    test_case.add_input<float>(rois_shape, rois_vect);
+    test_case.add_expected_output<float>(output_shape, expected_vect);
+    test_case.run();
+}