diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/eddo/experimental_detectron_detection_output_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/eddo/experimental_detectron_detection_output_kernel_ref.cpp index 9acd536e202..434ff1fedd1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/eddo/experimental_detectron_detection_output_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/eddo/experimental_detectron_detection_output_kernel_ref.cpp @@ -79,6 +79,10 @@ JitConstants ExperimentalDetectronDetectionOutputKernelRef::GetJitConstants( MakeJitConstant("OUTPUT_INDICES_TYPE", "INPUT4_TYPE"), }); + if (params.class_agnostic_box_regression) { + jit.AddConstant(MakeJitConstant("CLASS_AGNOSTIC_BOX_REGRESSION", true)); + } + return jit; } @@ -111,7 +115,7 @@ void ExperimentalDetectronDetectionOutputKernelRef::PrepareRefineBoxesKernel( const optional_params& options, clKernelData& kernel) const { const size_t roi_count = params.inputs[kScoresInputIdx].Batch().v; - const size_t class_count = params.num_classes; + const size_t class_count = params.class_agnostic_box_regression ? params.num_classes - 1 : params.num_classes; PrepareKernelCommon(params, options, {roi_count, class_count, 1}, "EDDO_STAGE_0_REFINE_BOXES", 0, kernel); diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/experimental_detectron_detection_output_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/experimental_detectron_detection_output_ref.cl index a16a4c3d68f..00eb0e71a5c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/experimental_detectron_detection_output_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/experimental_detectron_detection_output_ref.cl @@ -131,7 +131,12 @@ KERNEL(eddo_ref_stage_0) __global INPUT_TYPE* refined_scores) { const size_t roi_count = get_global_size(0); size_t roi_idx = get_global_id(0); + +#ifdef CLASS_AGNOSTIC_BOX_REGRESSION + size_t class_idx = get_global_id(1) + 1; +#else size_t class_idx = get_global_id(1); +#endif INPUT_TYPE4 box = vload4(roi_idx, boxes); @@ -160,7 +165,7 @@ KERNEL(eddo_ref_stage_0) // adjust new corner locations to be within the image region const INPUT_TYPE2 img_size = vload2(0, im_info).s10; - new_box = clamp(new_box, ZERO4, img_size.xyxy); + new_box = fmax(new_box, ZERO4); // recompute new width & height const INPUT_TYPE2 new_box_size = new_box.hi - new_box.lo + COORDINATE_OFFSET; diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp index ff5a1cfc2a1..8f4fde1f11e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp @@ -175,6 +175,72 @@ TEST_P(experimental_detectron_detection_output_test_f16, basic) { template std::vector> getExperimentalDetectronDetectionOutputParams() { std::vector> params = { + { + 0.01000000074505806f, // score_threshold + 0.2f, // nms_threshold + 2.0f, // max_delta_log_wh + 2, // num_classes + 500, // post_nms_count + 5, // max_detections_per_image + true, // class_agnostic_box_regression + {10.0f, 10.0f, 5.0f, 5.0f}, // deltas_weights + 16, // roi count + + // boxes + getValues({1.0f, 1.0f, 10.0f, 10.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 8.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}), + + // deltas + getValues( + {5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 8.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}), + + // scores + getValues({1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}), + + // im_info + getValues({1.0f, 1.0f, 1.0f}), + + // out_boxes + getValues({0.8929862f, + 0.892986297607421875, + 12.10701370239257812, + 12.10701370239257812, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0}), + + // out_classes + std::vector{1, 0, 0, 0, 0}, + + // out_scores + getValues({1.0f, 0.0f, 0.0f, 0.0f, 0.0f}) + }, { 0.01000000074505806f, // score_threshold 0.2f, // nms_threshold @@ -193,6 +259,7 @@ std::vector> getExperimentalDetect 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}), + // deltas getValues({1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 5.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, @@ -203,16 +270,24 @@ std::vector> getExperimentalDetect 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}), + // scores getValues({0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.8f, 0.9f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}), + // im_info getValues({16.0f, 12.0f, 1.0f}), - getValues({4.8929863f, 0.892986298f, 12.0f, 12.1070137f, 0.0f, 0.892986298f, 10.1070137f, - 12.1070137f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}), - std::vector{0, 1, 0, 0, 0}, - getValues({0.8f, 0.9f, 0.0f, 0.0f, 0.0f}), + + // out_boxes + getValues({ 0.0f, 0.892987f, 10.107f, 12.107f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f}), + + // out_classes + std::vector{1, 0, 0, 0, 0}, + + // out_scores + getValues({0.9f, 0.0f, 0.0f, 0.0f, 0.0f}), }, { 0.0500000007, // score_threshold @@ -274,12 +349,12 @@ std::vector> getExperimentalDetect }), // out_boxes - getValues({ - 0, 2.97829, 6.57812, 4.90234, 0, 4.90234, 6.57812, 4.90234, 4.37184, 4.90234, - 6.03075, 4.90234, 5.95093, 3.66966, 6.57812, 4.90234, 0, 4.90234, 6.57812, 4.90234, - 1.31075, 4.90234, 6.57812, 4.90234, 3.24829, 4.90234, 6.57812, 4.90234, 0, 0, - 6.57812, 4.90234, 4.20346, 0, 6.57812, 4.90234, 0, 0, 6.57812, 4.90234, - }), + getValues({ 0.0f, 2.97829f, 14.8295f, 11.1221f, 0.0f, 6.29737f, 16.2088f, 16.3451f, + 4.37184f, 6.41816f, 6.03075f, 15.934f, 5.95092f, 3.66966f, 6.81878f, 16.9983f, + 0.0f, 5.64766f, 17.3085f, 12.3716f, 1.31074f, 9.12453f, 13.1104f, 10.6441f, + 3.24828f, 7.11447f, 9.16656f, 10.1058f, 0.0f, 0.0f, 10.0008f, 14.6173f, + 4.20346f, 0.0f, 8.5746f, 18.8736f, 0.0f, 0.0f, 15.661f, 22.4114f} + ), // out_classes std::vector({ diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/experimental_detectron_detection_output.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/experimental_detectron_detection_output.cpp index 0153b4dc61f..14148c3b2e9 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/experimental_detectron_detection_output.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/experimental_detectron_detection_output.cpp @@ -32,12 +32,16 @@ const std::vector num_classes = {2}; const std::vector post_nms_count = {5, 25}; // specifies maximual number of detections per image +// there is assigning size_t rois_num = attrs.max_detections_per_image at docs/template_plugin/backend/evaluates_map.cpp:2117, +// as a result we have to set max_detections_per_image equal to rois_num +const std::vector max_detections_per_image16 = {16}; const std::vector max_detections_per_image = {5, 25}; // a flag specifies whether to delete background classes or not // `true` means background classes should be deleted, // `false` means background classes shouldn't be deleted. -const std::vector class_agnostic_box_regression = {true, false}; +const bool class_agnostic_box_regression_true = true; +const bool class_agnostic_box_regression_false = false; // specifies deltas of weights const std::vector> deltas_weights = {{10.0f, 10.0f, 5.0f, 5.0f}}; @@ -47,6 +51,37 @@ const std::vector> inputShapes = { static_shapes_to_test_representation({{16, 4}, {16, 8}, {16, 2}, {1, 3}}), }; + +INSTANTIATE_TEST_SUITE_P(smoke_ExperimentalDetectronDetectionOutput, + ExperimentalDetectronDetectionOutputLayerTest, + ::testing::Combine(::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(score_threshold), + ::testing::ValuesIn(nms_threshold), + ::testing::ValuesIn(max_delta_log_wh), + ::testing::ValuesIn(num_classes), + ::testing::ValuesIn(post_nms_count), + ::testing::ValuesIn(max_detections_per_image16), + ::testing::Values(class_agnostic_box_regression_true), + ::testing::ValuesIn(deltas_weights), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ExperimentalDetectronDetectionOutputLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_ExperimentalDetectronDetectionOutputMaxDetectionsPerImage, + ExperimentalDetectronDetectionOutputLayerTest, + ::testing::Combine(::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(score_threshold), + ::testing::ValuesIn(nms_threshold), + ::testing::ValuesIn(max_delta_log_wh), + ::testing::ValuesIn(num_classes), + ::testing::ValuesIn(post_nms_count), + ::testing::ValuesIn(max_detections_per_image), + ::testing::Values(class_agnostic_box_regression_true), + ::testing::ValuesIn(deltas_weights), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ExperimentalDetectronDetectionOutputLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_ExperimentalDetectronDetectionOutput, ExperimentalDetectronDetectionOutputLayerTest, ::testing::Combine(::testing::ValuesIn(inputShapes), @@ -56,7 +91,7 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_ExperimentalDetectronDetectionOutput, ::testing::ValuesIn(num_classes), ::testing::ValuesIn(post_nms_count), ::testing::ValuesIn(max_detections_per_image), - ::testing::ValuesIn(class_agnostic_box_regression), + ::testing::Values(class_agnostic_box_regression_false), ::testing::ValuesIn(deltas_weights), ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_GPU)), diff --git a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp index 41b3b8c1cce..83274e4ab43 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp @@ -99,18 +99,34 @@ void ExperimentalDetectronDetectionOutputLayerTest::SetUp() { "ExperimentalDetectronDetectionOutput"); } -void ExperimentalDetectronDetectionOutputLayerTest::generate_inputs(const std::vector& targetInputStaticShapes) { - static const std::vector inputTensors = { +namespace { + +template +std::vector getValues(const std::vector& values) { + std::vector result(values.begin(), values.end()); + return result; +} + +template +std::vector generateInputTensors() { + const auto netPrecision = ov::element::from(); + std::vector inputTensors = { // 16 x 4 = 64 - ov::test::utils::create_tensor(ov::element::f32, Shape{16, 4}, { + ov::test::utils::create_tensor( + netPrecision, + Shape{16, 4}, + getValues({ 1.0f, 1.0f, 10.0f, 10.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 8.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f - }), + })), // 16 x 8 - ov::test::utils::create_tensor(ov::element::f32, Shape{16, 8}, { + ov::test::utils::create_tensor( + netPrecision, + Shape{16, 8}, + getValues({ 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, @@ -120,16 +136,29 @@ void ExperimentalDetectronDetectionOutputLayerTest::generate_inputs(const std::v 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f - }), + })), // 16 x 2 = 32 - ov::test::utils::create_tensor(ov::element::f32, Shape{16, 2}, { + ov::test::utils::create_tensor( + netPrecision, + Shape{16, 2}, + getValues({ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f - }), + })), // 1 x 3 = 3 - ov::test::utils::create_tensor(ov::element::f32, Shape{1, 3}, {1.0f, 1.0f, 1.0f}) - }; + ov::test::utils::create_tensor(netPrecision, Shape{1, 3}, getValues({1.0f, 1.0f, 1.0f}))}; + + return inputTensors; +} +} // namespace + +void ExperimentalDetectronDetectionOutputLayerTest::generate_inputs( + const std::vector& targetInputStaticShapes) { + const auto netPrecision = std::get<9>(GetParam()); + + const std::vector inputTensors = + (netPrecision == element::f16) ? generateInputTensors() : generateInputTensors(); inputs.clear(); const auto& funcInputs = function->inputs();