diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 13a474da81b..c036421ea98 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -252,6 +252,9 @@ REGISTER_FACTORY(v10, IsInf); REGISTER_FACTORY(v10, IsNaN); REGISTER_FACTORY(v10, Unique); +// ------------------------------ Supported v11 ops ----------------------------- // +REGISTER_FACTORY(v11, TopK); + // --------------------------- Supported internal ops --------------------------- // REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal); REGISTER_FACTORY(internal, GenerateProposalsIEInternal); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp index ad6acc5da07..16ca38e746e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/arg_max_min.hpp @@ -24,7 +24,8 @@ struct arg_max_min : public primitive_base { top_k(0), axis(0), sort(ov::op::TopKSortType::NONE), - values_first(false) {} + values_first(false), + stable(false) {} DECLARE_OBJECT_TYPE_SERIALIZATION @@ -35,6 +36,7 @@ struct arg_max_min : public primitive_base { /// @param top_k Number of indices to output. /// @param axis Axis to maximize/minimize along. /// @param sort Type of sorting - by values or indices. + /// @param stable Controls whether sorting is stable. arg_max_min(const primitive_id& id, const std::vector& inputs, ov::op::TopKMode mode, @@ -42,6 +44,7 @@ struct arg_max_min : public primitive_base { int64_t axis, ov::op::TopKSortType sort = ov::op::TopKSortType::SORT_VALUES, bool values_first = false, + bool stable = false, const padding& output_padding = padding(), data_types output_data_type = data_types::f32, const size_t num_outputs = 1) @@ -50,7 +53,8 @@ struct arg_max_min : public primitive_base { top_k(top_k), axis(axis), sort(sort), - values_first(values_first) {} + values_first(values_first), + stable(stable) {} /// @brief Constructs arg_max_min for top_k parameter arg_max_min(const primitive_id& id, @@ -61,6 +65,7 @@ struct arg_max_min : public primitive_base { int64_t axis, ov::op::TopKSortType sort = ov::op::TopKSortType::SORT_VALUES, bool values_first = false, + bool stable = false, const padding& output_padding = padding(), data_types output_data_type = data_types::f32, const size_t num_outputs = 1) @@ -69,7 +74,8 @@ struct arg_max_min : public primitive_base { top_k(top_k), axis(axis), sort(sort), - values_first(values_first) {} + values_first(values_first), + stable(stable) {} /// @brief Type of output - max or min. ov::op::TopKMode mode; @@ -81,6 +87,8 @@ struct arg_max_min : public primitive_base { ov::op::TopKSortType sort; /// @brief Sets output order: if True than first output contains values and second (optional) - indices. bool values_first; + /// @brief Specifies whether the equivalent elements should maintain their relative order from the input tensor during sorting. + bool stable; size_t hash() const override { size_t seed = primitive::hash(); @@ -89,6 +97,7 @@ struct arg_max_min : public primitive_base { seed = hash_combine(seed, axis); seed = hash_combine(seed, sort); seed = hash_combine(seed, values_first); + seed = hash_combine(seed, stable); return seed; } @@ -102,7 +111,8 @@ struct arg_max_min : public primitive_base { top_k == rhs_casted.top_k && axis == rhs_casted.axis && sort == rhs_casted.sort && - values_first == rhs_casted.values_first; + values_first == rhs_casted.values_first && + stable == rhs_casted.stable; } size_t get_output_nums() const { @@ -120,6 +130,7 @@ struct arg_max_min : public primitive_base { ob << axis; ob << make_data(&sort, sizeof(ov::op::TopKSortType)); ob << values_first; + ob << stable; } void load(BinaryInputBuffer& ib) override { @@ -131,6 +142,7 @@ struct arg_max_min : public primitive_base { ib >> axis; ib >> make_data(&sort, sizeof(ov::op::TopKSortType)); ib >> values_first; + ib >> stable; } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp index fd069138083..8eb30833756 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/arg_max_min.cpp @@ -66,6 +66,7 @@ public: const auto& mode = primitive->mode; const auto& sort_type = primitive->sort; const auto& values_first = primitive->values_first; + const auto& stable = primitive->stable; const auto& outputs_num = primitive->input_size() == 3 ? 2 : static_cast(primitive->output_size()); auto argm_params = get_default_params(impl_param, is_shape_agnostic); @@ -107,6 +108,7 @@ public: } argm_params.values_first = values_first; + argm_params.stable = stable; return {argm_params, argm_optional_params}; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp index 9354d5f0d9d..976ef67d9ac 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.cpp @@ -21,6 +21,11 @@ JitConstants ArgMaxMinKernelBase::GetJitConstants(const arg_max_min_params& para MakeJitConstant(toString(params.argMaxMinAxis) + "_AXIS", 1), params.argMaxMinOut == ArgMaxMinOut::MAX ? MakeJitConstant("MAX_OUT", 1) : MakeJitConstant("MIN_OUT", 1)}); + // For now, we don't use this constant in the kernel as sorting is always stable. + if (params.stable) { + jit.AddConstant(MakeJitConstant("STABLE", true)); + } + return jit; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.h index abf8bcf37e1..97423a75c7f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/arg_max_min/arg_max_min_kernel_base.h @@ -22,6 +22,7 @@ struct arg_max_min_params : public base_params { bool values_first = false; bool has_second_output = false; bool use_multiple_outputs = false; + bool stable = false; ParamsKey GetParamsKey() const override { ParamsKey k = base_params::GetParamsKey(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_topk_rois.cpp b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_topk_rois.cpp index e76d796df47..d5b9e595420 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_topk_rois.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_topk_rois.cpp @@ -26,7 +26,7 @@ void CreateExperimentalDetectronTopKROIsOp(Program &p, auto argmax_layer_name = layer_name + "_topk"; auto top_k_indices = arg_max_min(argmax_layer_name, {inputs[1]}, ov::op::TopKMode::MAX, max_rois, 0, - ov::op::TopKSortType::SORT_VALUES, false, cldnn::padding(), cldnn::data_types::i32); + ov::op::TopKSortType::SORT_VALUES, false, false, cldnn::padding(), cldnn::data_types::i32); p.add_primitive(*op, top_k_indices); diff --git a/src/plugins/intel_gpu/src/plugin/ops/topk.cpp b/src/plugins/intel_gpu/src/plugin/ops/topk.cpp index ace774c429f..735be8864db 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/topk.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/topk.cpp @@ -14,17 +14,17 @@ namespace ov { namespace intel_gpu { -static void CreateTopKOp(Program& p, const std::shared_ptr& op) { +static void TopKImpl(Program& p, + const std::shared_ptr& op, + ov::op::TopKMode mode, + ov::op::TopKSortType stype, + uint32_t top_k, + uint64_t chosen_axis, + bool stable = false) { validate_inputs_count(op, {2}); auto inputs = p.GetInputInfo(op); std::string layerName = layer_type_name_ID(op); - ov::op::TopKMode mode = op->get_mode(); - ov::op::TopKSortType stype = op->get_sort_type(); - - uint32_t top_k = static_cast(op->get_k()); - uint64_t chosen_axis = op->get_axis(); - if (p.use_new_shape_infer()) { size_t num_outputs = op->get_output_size(); auto get_output_paddings = [&]() { @@ -51,6 +51,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr chosen_axis, stype, true, + stable, cldnn::padding({0, 0, 0, 0}, 0), cldnn::element_type_to_data_type(op->get_output_element_type(0)), num_outputs); @@ -85,6 +86,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr chosen_axis, stype, true, + stable, cldnn::padding({0, 0, 0, 0}, 0), cldnn::element_type_to_data_type(op->get_output_element_type(0))); @@ -103,6 +105,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr chosen_axis, stype, true, + stable, cldnn::padding({0, 0, 0, 0}, 0), cldnn::element_type_to_data_type(op->get_output_element_type(0))); @@ -113,7 +116,16 @@ static void CreateTopKOp(Program& p, const std::shared_ptr } } +static void CreateTopKOp(Program& p, const std::shared_ptr& op) { + TopKImpl(p, op, op->get_mode(), op->get_sort_type(), static_cast(op->get_k()), op->get_axis()); +} + +static void CreateTopKOp(Program& p, const std::shared_ptr& op) { + TopKImpl(p, op, op->get_mode(), op->get_sort_type(), static_cast(op->get_k()), op->get_axis(), op->get_stable()); +} + REGISTER_FACTORY_IMPL(v1, TopK); +REGISTER_FACTORY_IMPL(v11, TopK); } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index d836b8922f0..be064fa5e07 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -442,6 +443,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->disable(); pass_config->disable(); pass_config->disable(); + pass_config->disable(); pass_config->enable(); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp index 638ad5437e6..0d0e7145b1c 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp @@ -2,53 +2,197 @@ // SPDX-License-Identifier: Apache-2.0 // +#include +#include #include -#include "single_layer_tests/topk.hpp" +#include "ngraph_functions/builders.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" -using namespace LayerTestsDefinitions; +namespace GPULayerTestsDefinitions { + +typedef std::tuple + TopKGPUParams; + +class TopKLayerTestGPU : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override; + +protected: + void SetUp() override; +}; + +std::string TopKLayerTestGPU::getTestCaseName(const testing::TestParamInfo& obj) { + InferenceEngine::Precision netPrecision; + InferenceEngine::Precision inPrc, outPrc; + InferenceEngine::Layout inLayout; + InferenceEngine::SizeVector inputShape; + std::string targetDevice; + int64_t keepK, axis; + ov::op::TopKMode mode; + ov::op::TopKSortType sort; + bool stable; + std::tie(keepK, axis, mode, sort, stable, netPrecision, inPrc, outPrc, inLayout, inputShape, targetDevice) = + obj.param; + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_"; + result << "k=" << keepK << "_"; + result << "axis=" << axis << "_"; + result << "mode=" << mode << "_"; + result << "sort=" << sort << "_"; + result << "stable=" << stable << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "inPRC=" << inPrc.name() << "_"; + result << "outPRC=" << outPrc.name() << "_"; + result << "inL=" << inLayout << "_"; + result << "trgDev=" << targetDevice; + return result.str(); +} + +void TopKLayerTestGPU::SetUp() { + InferenceEngine::SizeVector inputShape; + InferenceEngine::Precision netPrecision; + int64_t keepK, axis; + ov::op::TopKMode mode; + ov::op::TopKSortType sort; + bool stable; + std::tie(keepK, axis, mode, sort, stable, netPrecision, inPrc, outPrc, inLayout, inputShape, targetDevice) = + this->GetParam(); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramIn = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + + auto k = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{}, &keepK); + auto topk = std::dynamic_pointer_cast( + std::make_shared(paramIn[0], k, axis, mode, sort, ngraph::element::Type_t::i64, stable)); + + ngraph::ResultVector results; + for (size_t i = 0; i < topk->get_output_size(); i++) { + results.push_back(std::make_shared(topk->output(i))); + } + function = std::make_shared(results, params, "TopK"); +} + +InferenceEngine::Blob::Ptr TopKLayerTestGPU::GenerateInput(const InferenceEngine::InputInfo& info) const { + IE_ASSERT(InferenceEngine::Precision::FP32 == info.getTensorDesc().getPrecision() || + InferenceEngine::Precision::BF16 == info.getTensorDesc().getPrecision() || + InferenceEngine::Precision::FP16 == info.getTensorDesc().getPrecision()); + + InferenceEngine::Precision netPrecision; + InferenceEngine::Precision inPrc, outPrc; + InferenceEngine::Layout inLayout; + InferenceEngine::SizeVector inputShape; + std::string targetDevice; + int64_t keepK, axis; + ov::op::TopKMode mode; + ov::op::TopKSortType sort; + bool stable; + std::tie(keepK, axis, mode, sort, stable, netPrecision, inPrc, outPrc, inLayout, inputShape, targetDevice) = + this->GetParam(); + + InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc()); + blob->allocate(); + + // For unstable sorting, generate unrepeated input data. + // While for stable sorting repeating values are explicitly set. + + size_t size = blob->size(); + int start = -static_cast(size / 2); + std::vector data(size); + size_t set_size = sort == ov::op::TopKSortType::SORT_VALUES && stable ? size / 2 : size; + std::iota(data.begin(), data.begin() + set_size, start); + if (sort == ov::op::TopKSortType::SORT_VALUES && stable) { + std::copy(data.begin(), data.begin() + set_size, data.begin() + set_size); + } + std::mt19937 gen(0); + std::shuffle(data.begin(), data.end(), gen); + + float divisor = size / 10.0; + if (InferenceEngine::Precision::FP32 == info.getTensorDesc().getPrecision()) { + auto* rawBlobDataPtr = blob->buffer().as(); + for (size_t i = 0; i < size; i++) { + rawBlobDataPtr[i] = static_cast(data[i] / divisor); + } + } else if (InferenceEngine::Precision::BF16 == info.getTensorDesc().getPrecision()) { + auto* rawBlobDataPtr = blob->buffer().as(); + for (size_t i = 0; i < size; i++) { + rawBlobDataPtr[i] = static_cast(data[i] / divisor); + } + } else if (InferenceEngine::Precision::FP16 == info.getTensorDesc().getPrecision()) { + auto* rawBlobDataPtr = blob->buffer().as(); + for (size_t i = 0; i < size; i++) { + rawBlobDataPtr[i] = static_cast(data[i] / divisor); + } + } + + return blob; +} namespace { const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, }; const std::vector axes = { - 0, - 1, - 2, + 0, + 1, + 2, }; const std::vector k = { - 1, - 5, - 10, + 1, + 5, + 10, }; -const std::vector modes = { - ngraph::opset4::TopK::Mode::MIN, - ngraph::opset4::TopK::Mode::MAX +const std::vector modes = { + ov::op::TopKMode::MIN, + ov::op::TopKMode::MAX, }; -const std::vector sortTypes = { - ngraph::opset4::TopK::SortType::SORT_INDICES, - ngraph::opset4::TopK::SortType::SORT_VALUES, +const std::vector sortTypes = { + ov::op::TopKSortType::SORT_INDICES, + ov::op::TopKSortType::SORT_VALUES, }; +const std::vector stable = { + false, + true, +}; -INSTANTIATE_TEST_SUITE_P(smoke_TopK, TopKLayerTest, - ::testing::Combine( - ::testing::ValuesIn(k), - ::testing::ValuesIn(axes), - ::testing::ValuesIn(modes), - ::testing::ValuesIn(sortTypes), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({10, 10, 10})), - ::testing::Values(CommonTestUtils::DEVICE_GPU)), - TopKLayerTest::getTestCaseName); +TEST_P(TopKLayerTestGPU, CompareWithRefs) { + Run(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_TopK, + TopKLayerTestGPU, + ::testing::Combine(::testing::ValuesIn(k), + ::testing::ValuesIn(axes), + ::testing::ValuesIn(modes), + ::testing::ValuesIn(sortTypes), + ::testing::ValuesIn(stable), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({10, 10, 10})), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + TopKLayerTestGPU::getTestCaseName); } // namespace +} // namespace GPULayerTestsDefinitions diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/arg_max_min_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/arg_max_min_si_test.cpp index 778b7b8186c..4d8ea80f7e3 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/arg_max_min_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/arg_max_min_si_test.cpp @@ -64,7 +64,7 @@ TEST_P(arg_max_min_test, shape_infer) { auto arg_max_min_prim = std::make_shared("output", p.inputs.empty() ? input_prim_ids : p.inputs, p.mode, p.top_k, p.axis, - ov::op::TopKSortType::SORT_VALUES, false, padding(), + ov::op::TopKSortType::SORT_VALUES, false, false, padding(), p.output_data_type, p.num_outputs); std::vector output_paddings; std::vector output_data_types; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/arg_max_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/arg_max_gpu_test.cpp index 0b5d5db256b..c665314ab8f 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/arg_max_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/arg_max_gpu_test.cpp @@ -113,6 +113,7 @@ TEST(arg_max_gpu_min_axis_batch_bfzyx, i32) { 0, ov::op::TopKSortType::SORT_VALUES, false, + false, padding(), data_types::i32)); @@ -163,6 +164,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb, f32) { 2, ov::op::TopKSortType::SORT_VALUES, false, + false, padding(), data_types::f32)); @@ -230,6 +232,7 @@ TEST(arg_max_gpu_min_axis_batch_yxfb, f32) { 0, ov::op::TopKSortType::SORT_VALUES, false, + false, padding(), data_types::f32)); @@ -295,6 +298,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) { 2, ov::op::TopKSortType::SORT_VALUES, false, + false, padding(), data_types::f32)); @@ -405,6 +409,7 @@ TEST(top_k_layer_tests, second_output2) { 0, ov::op::TopKSortType::SORT_VALUES, false, + false, padding(), data_types::f32)); @@ -495,6 +500,7 @@ TEST(top_k_layer_tests, multiple_outputs) { 0, ov::op::TopKSortType::SORT_VALUES, false, + false, padding(), data_types::f32, 2); @@ -580,6 +586,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) { 2, ov::op::TopKSortType::SORT_VALUES, false, + false, padding(), data_types::f32)); @@ -637,6 +644,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) { 2, ov::op::TopKSortType::SORT_INDICES, false, + false, padding(), data_types::f32)); @@ -695,6 +703,7 @@ void test_top_k_layer_tests_sort_probabilities_by_indices(bool is_caching_test) 3, ov::op::TopKSortType::SORT_VALUES, false, + false, padding(), data_types::i32)); std::vector input_vec = {0.9f, 0.1f, 0.2f, 0.8f, 0.5f, 0.6f, 0.3f, 0.4f, 0.7f, 0.95f}; diff --git a/src/tests/functional/shared_test_classes/src/single_layer/topk.cpp b/src/tests/functional/shared_test_classes/src/single_layer/topk.cpp index f4dd967b404..994180d2470 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/topk.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/topk.cpp @@ -47,7 +47,7 @@ void TopKLayerTest::SetUp() { std::make_shared(paramIn[0], k, axis, mode, sort)); ngraph::ResultVector results; - for (int i = 0; i < topk->get_output_size(); i++) { + for (size_t i = 0; i < topk->get_output_size(); i++) { results.push_back(std::make_shared(topk->output(i))); } function = std::make_shared(results, params, "TopK");