[GPU] Add support class agnostic box regression for experimental detectron … (#13003)

* add support class agnostic box regression for experimental detectron detection output * add test
2022-10-11 15:31:47 +03:00 · 2022-10-11 15:31:47 +03:00 · 2d4d80a444
commit 2d4d80a444
parent 66b5f9acf2
5 changed files with 173 additions and 25 deletions
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/eddo/experimental_detectron_detection_output_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/eddo/experimental_detectron_detection_output_kernel_ref.cpp
@ -79,6 +79,10 @@ JitConstants ExperimentalDetectronDetectionOutputKernelRef::GetJitConstants(
        MakeJitConstant("OUTPUT_INDICES_TYPE", "INPUT4_TYPE"),
    });

+    if (params.class_agnostic_box_regression) {
+        jit.AddConstant(MakeJitConstant("CLASS_AGNOSTIC_BOX_REGRESSION", true));
+    }
+
    return jit;
 }

@ -111,7 +115,7 @@ void ExperimentalDetectronDetectionOutputKernelRef::PrepareRefineBoxesKernel(
    const optional_params& options,
    clKernelData& kernel) const {
    const size_t roi_count = params.inputs[kScoresInputIdx].Batch().v;
-    const size_t class_count = params.num_classes;
+    const size_t class_count = params.class_agnostic_box_regression ? params.num_classes - 1 : params.num_classes;

    PrepareKernelCommon(params, options, {roi_count, class_count, 1}, "EDDO_STAGE_0_REFINE_BOXES", 0, kernel);

--- a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/experimental_detectron_detection_output_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/experimental_detectron_detection_output_ref.cl
@ -131,7 +131,12 @@ KERNEL(eddo_ref_stage_0)
 __global INPUT_TYPE* refined_scores) {
    const size_t roi_count = get_global_size(0);
    size_t roi_idx = get_global_id(0);
+
+#ifdef CLASS_AGNOSTIC_BOX_REGRESSION
+    size_t class_idx = get_global_id(1) + 1;
+#else
    size_t class_idx = get_global_id(1);
+#endif

    INPUT_TYPE4 box = vload4(roi_idx, boxes);

@ -160,7 +165,7 @@ KERNEL(eddo_ref_stage_0)

        // adjust new corner locations to be within the image region
        const INPUT_TYPE2 img_size = vload2(0, im_info).s10;
-        new_box = clamp(new_box, ZERO4, img_size.xyxy);
+        new_box = fmax(new_box, ZERO4);

        // recompute new width & height
        const INPUT_TYPE2 new_box_size = new_box.hi - new_box.lo + COORDINATE_OFFSET;
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp
@ -175,6 +175,72 @@ TEST_P(experimental_detectron_detection_output_test_f16, basic) {
 template <typename T>
 std::vector<ExperimentalDetectronDetectionOutputParams<T>> getExperimentalDetectronDetectionOutputParams() {
    std::vector<ExperimentalDetectronDetectionOutputParams<T>> params = {
+        {
+            0.01000000074505806f,       // score_threshold
+            0.2f,                       // nms_threshold
+            2.0f,                       // max_delta_log_wh
+            2,                          // num_classes
+            500,                        // post_nms_count
+            5,                          // max_detections_per_image
+            true,                       // class_agnostic_box_regression
+            {10.0f, 10.0f, 5.0f, 5.0f}, // deltas_weights
+            16,                         // roi count
+
+            // boxes
+            getValues<T>({1.0f, 1.0f, 10.0f, 10.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                          1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 8.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}),
+
+            // deltas
+            getValues<T>(
+                {5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                 1.0f, 1.0f, 8.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+
+                 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}),
+
+            // scores
+            getValues<T>({1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}),
+
+            // im_info
+            getValues<T>({1.0f, 1.0f, 1.0f}),
+
+            // out_boxes
+            getValues<T>({0.8929862f,
+                          0.892986297607421875,
+                          12.10701370239257812,
+                          12.10701370239257812,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0f,
+                          0.0}),
+
+            // out_classes
+            std::vector<int32_t>{1, 0, 0, 0, 0},
+
+            // out_scores
+            getValues<T>({1.0f, 0.0f, 0.0f, 0.0f, 0.0f})
+        },
        {
            0.01000000074505806f,        // score_threshold
            0.2f,                        // nms_threshold
@ -193,6 +259,7 @@ std::vector<ExperimentalDetectronDetectionOutputParams<T>> getExperimentalDetect
                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,  1.0f,  1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,  1.0f,  1.0f, 1.0f, 1.0f, 1.0f, 1.0f}),

+            // deltas
            getValues<T>({1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,  1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f,
                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,  1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f, 1.0f, 1.0f,
                          1.0f, 1.0f, 5.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
@ -203,16 +270,24 @@ std::vector<ExperimentalDetectronDetectionOutputParams<T>> getExperimentalDetect
                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,  1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
                          1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,  1.0f}),

+            // scores
            getValues<T>({0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.8f, 0.9f, 0.5f,
                          0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
                          0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}),

+            // im_info
            getValues<T>({16.0f, 12.0f, 1.0f}),
-            getValues<T>({4.8929863f,  0.892986298f, 12.0f, 12.1070137f, 0.0f, 0.892986298f, 10.1070137f,
-                          12.1070137f, 0.0f,         0.0f,  0.0f,        0.0f, 0.0f,         0.0f,
-                          0.0f,        0.0f,         0.0f,  0.0f,        0.0f, 0.0f}),
-            std::vector<int32_t>{0, 1, 0, 0, 0},
-            getValues<T>({0.8f, 0.9f, 0.0f, 0.0f, 0.0f}),
+
+            // out_boxes
+            getValues<T>({ 0.0f, 0.892987f, 10.107f, 12.107f, 0.0f, 0.0f, 0.0f, 0.0f,
+                           0.0f, 0.0f,       0.0f,    0.0f,   0.0f, 0.0f, 0.0f, 0.0f,
+                           0.0f, 0.0f,       0.0f,    0.0f}),
+
+            // out_classes
+            std::vector<int32_t>{1, 0, 0, 0, 0},
+
+            // out_scores
+            getValues<T>({0.9f, 0.0f, 0.0f, 0.0f, 0.0f}),
        },
        {
            0.0500000007,                // score_threshold
@ -274,12 +349,12 @@ std::vector<ExperimentalDetectronDetectionOutputParams<T>> getExperimentalDetect
            }),

            // out_boxes
-            getValues<T>({
-                0,       2.97829, 6.57812, 4.90234, 0,       4.90234, 6.57812, 4.90234, 4.37184, 4.90234,
-                6.03075, 4.90234, 5.95093, 3.66966, 6.57812, 4.90234, 0,       4.90234, 6.57812, 4.90234,
-                1.31075, 4.90234, 6.57812, 4.90234, 3.24829, 4.90234, 6.57812, 4.90234, 0,       0,
-                6.57812, 4.90234, 4.20346, 0,       6.57812, 4.90234, 0,       0,       6.57812, 4.90234,
-            }),
+            getValues<T>({ 0.0f,     2.97829f, 14.8295f,  11.1221f, 0.0f,     6.29737f, 16.2088f,  16.3451f,
+                           4.37184f, 6.41816f,  6.03075f, 15.934f,  5.95092f, 3.66966f,  6.81878f, 16.9983f,
+                           0.0f,     5.64766f, 17.3085f,  12.3716f, 1.31074f, 9.12453f, 13.1104f,  10.6441f,
+                           3.24828f, 7.11447f,  9.16656f, 10.1058f, 0.0f,     0.0f,     10.0008f,  14.6173f,
+                           4.20346f, 0.0f,      8.5746f,  18.8736f, 0.0f,     0.0f,     15.661f,   22.4114f}
+            ),

            // out_classes
            std::vector<int32_t>({
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/experimental_detectron_detection_output.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/experimental_detectron_detection_output.cpp
@ -32,12 +32,16 @@ const std::vector<int64_t> num_classes = {2};
 const std::vector<int64_t> post_nms_count = {5, 25};

 // specifies maximual number of detections per image
+// there is assigning size_t rois_num = attrs.max_detections_per_image at docs/template_plugin/backend/evaluates_map.cpp:2117,
+// as a result we have to set max_detections_per_image equal to rois_num
+const std::vector<size_t> max_detections_per_image16 = {16};
 const std::vector<size_t> max_detections_per_image = {5, 25};

 // a flag specifies whether to delete background classes or not
 // `true`  means background classes should be deleted,
 // `false` means background classes shouldn't be deleted.
-const std::vector<bool> class_agnostic_box_regression = {true, false};
+const bool class_agnostic_box_regression_true = true;
+const bool class_agnostic_box_regression_false = false;

 // specifies deltas of weights
 const std::vector<std::vector<float>> deltas_weights = {{10.0f, 10.0f, 5.0f, 5.0f}};
@ -47,6 +51,37 @@ const std::vector<std::vector<InputShape>> inputShapes = {
    static_shapes_to_test_representation({{16, 4}, {16, 8}, {16, 2}, {1, 3}}),
 };

+
+INSTANTIATE_TEST_SUITE_P(smoke_ExperimentalDetectronDetectionOutput,
+                         ExperimentalDetectronDetectionOutputLayerTest,
+                         ::testing::Combine(::testing::ValuesIn(inputShapes),
+                                            ::testing::ValuesIn(score_threshold),
+                                            ::testing::ValuesIn(nms_threshold),
+                                            ::testing::ValuesIn(max_delta_log_wh),
+                                            ::testing::ValuesIn(num_classes),
+                                            ::testing::ValuesIn(post_nms_count),
+                                            ::testing::ValuesIn(max_detections_per_image16),
+                                            ::testing::Values(class_agnostic_box_regression_true),
+                                            ::testing::ValuesIn(deltas_weights),
+                                            ::testing::ValuesIn(netPrecisions),
+                                            ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                         ExperimentalDetectronDetectionOutputLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_ExperimentalDetectronDetectionOutputMaxDetectionsPerImage,
+                         ExperimentalDetectronDetectionOutputLayerTest,
+                         ::testing::Combine(::testing::ValuesIn(inputShapes),
+                                            ::testing::ValuesIn(score_threshold),
+                                            ::testing::ValuesIn(nms_threshold),
+                                            ::testing::ValuesIn(max_delta_log_wh),
+                                            ::testing::ValuesIn(num_classes),
+                                            ::testing::ValuesIn(post_nms_count),
+                                            ::testing::ValuesIn(max_detections_per_image),
+                                            ::testing::Values(class_agnostic_box_regression_true),
+                                            ::testing::ValuesIn(deltas_weights),
+                                            ::testing::ValuesIn(netPrecisions),
+                                            ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                         ExperimentalDetectronDetectionOutputLayerTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_ExperimentalDetectronDetectionOutput,
                         ExperimentalDetectronDetectionOutputLayerTest,
                         ::testing::Combine(::testing::ValuesIn(inputShapes),
@ -56,7 +91,7 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_ExperimentalDetectronDetectionOutput,
                                            ::testing::ValuesIn(num_classes),
                                            ::testing::ValuesIn(post_nms_count),
                                            ::testing::ValuesIn(max_detections_per_image),
-                                            ::testing::ValuesIn(class_agnostic_box_regression),
+                                            ::testing::Values(class_agnostic_box_regression_false),
                                            ::testing::ValuesIn(deltas_weights),
                                            ::testing::ValuesIn(netPrecisions),
                                            ::testing::Values(CommonTestUtils::DEVICE_GPU)),
--- a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp
+++ b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp
@ -99,18 +99,34 @@ void ExperimentalDetectronDetectionOutputLayerTest::SetUp() {
        "ExperimentalDetectronDetectionOutput");
 }

-void ExperimentalDetectronDetectionOutputLayerTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
-    static const std::vector<ov::Tensor> inputTensors = {
+namespace {
+
+template <typename T>
+std::vector<T> getValues(const std::vector<float>& values) {
+    std::vector<T> result(values.begin(), values.end());
+    return result;
+}
+
+template <typename T>
+std::vector<ov::Tensor> generateInputTensors() {
+    const auto netPrecision = ov::element::from<T>();
+    std::vector<ov::Tensor> inputTensors = {
        // 16 x 4 = 64
-        ov::test::utils::create_tensor<float>(ov::element::f32, Shape{16, 4}, {
+        ov::test::utils::create_tensor<T>(
+            netPrecision,
+            Shape{16, 4},
+            getValues<T>({
            1.0f, 1.0f, 10.0f, 10.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 1.0f,  4.0f,  1.0f, 8.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 1.0f,  1.0f,  1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 1.0f,  1.0f,  1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 1.0f,  1.0f,  1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
-        }),
+        })),
        // 16 x 8
-        ov::test::utils::create_tensor<float>(ov::element::f32, Shape{16, 8}, {
+        ov::test::utils::create_tensor<T>(
+            netPrecision,
+            Shape{16, 8},
+            getValues<T>({
            5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 8.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
@ -120,16 +136,29 @@ void ExperimentalDetectronDetectionOutputLayerTest::generate_inputs(const std::v
            1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
-        }),
+        })),
        // 16 x 2 = 32
-        ov::test::utils::create_tensor<float>(ov::element::f32, Shape{16, 2}, {
+        ov::test::utils::create_tensor<T>(
+            netPrecision,
+            Shape{16, 2},
+            getValues<T>({
            1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
            1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
-        }),
+        })),
        // 1 x 3 = 3
-        ov::test::utils::create_tensor<float>(ov::element::f32, Shape{1, 3}, {1.0f, 1.0f, 1.0f})
-    };
+        ov::test::utils::create_tensor<T>(netPrecision, Shape{1, 3}, getValues<T>({1.0f, 1.0f, 1.0f}))};
+
+    return inputTensors;
+}
+}  // namespace
+
+void ExperimentalDetectronDetectionOutputLayerTest::generate_inputs(
+    const std::vector<ngraph::Shape>& targetInputStaticShapes) {
+    const auto netPrecision = std::get<9>(GetParam());
+
+    const std::vector<ov::Tensor> inputTensors =
+        (netPrecision == element::f16) ? generateInputTensors<ov::float16>() : generateInputTensors<float>();

    inputs.clear();
    const auto& funcInputs = function->inputs();