[GPU] Added TopK-11 operation support. (#18294)

* Added TopK-11 operation support.

* Created separate GPU test.
This commit is contained in:
Mykhailo Hnap 2023-07-21 23:36:43 +03:00 committed by GitHub
parent 0974cb10bc
commit 7f183f0e2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 233 additions and 43 deletions

View File

@ -252,6 +252,9 @@ REGISTER_FACTORY(v10, IsInf);
REGISTER_FACTORY(v10, IsNaN);
REGISTER_FACTORY(v10, Unique);
// ------------------------------ Supported v11 ops ----------------------------- //
REGISTER_FACTORY(v11, TopK);
// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
REGISTER_FACTORY(internal, GenerateProposalsIEInternal);

View File

@ -24,7 +24,8 @@ struct arg_max_min : public primitive_base<arg_max_min> {
top_k(0),
axis(0),
sort(ov::op::TopKSortType::NONE),
values_first(false) {}
values_first(false),
stable(false) {}
DECLARE_OBJECT_TYPE_SERIALIZATION
@ -35,6 +36,7 @@ struct arg_max_min : public primitive_base<arg_max_min> {
/// @param top_k Number of indices to output.
/// @param axis Axis to maximize/minimize along.
/// @param sort Type of sorting - by values or indices.
/// @param stable Controls whether sorting is stable.
arg_max_min(const primitive_id& id,
const std::vector<input_info>& inputs,
ov::op::TopKMode mode,
@ -42,6 +44,7 @@ struct arg_max_min : public primitive_base<arg_max_min> {
int64_t axis,
ov::op::TopKSortType sort = ov::op::TopKSortType::SORT_VALUES,
bool values_first = false,
bool stable = false,
const padding& output_padding = padding(),
data_types output_data_type = data_types::f32,
const size_t num_outputs = 1)
@ -50,7 +53,8 @@ struct arg_max_min : public primitive_base<arg_max_min> {
top_k(top_k),
axis(axis),
sort(sort),
values_first(values_first) {}
values_first(values_first),
stable(stable) {}
/// @brief Constructs arg_max_min for top_k parameter
arg_max_min(const primitive_id& id,
@ -61,6 +65,7 @@ struct arg_max_min : public primitive_base<arg_max_min> {
int64_t axis,
ov::op::TopKSortType sort = ov::op::TopKSortType::SORT_VALUES,
bool values_first = false,
bool stable = false,
const padding& output_padding = padding(),
data_types output_data_type = data_types::f32,
const size_t num_outputs = 1)
@ -69,7 +74,8 @@ struct arg_max_min : public primitive_base<arg_max_min> {
top_k(top_k),
axis(axis),
sort(sort),
values_first(values_first) {}
values_first(values_first),
stable(stable) {}
/// @brief Type of output - max or min.
ov::op::TopKMode mode;
@ -81,6 +87,8 @@ struct arg_max_min : public primitive_base<arg_max_min> {
ov::op::TopKSortType sort;
/// @brief Sets output order: if True than first output contains values and second (optional) - indices.
bool values_first;
/// @brief Specifies whether the equivalent elements should maintain their relative order from the input tensor during sorting.
bool stable;
size_t hash() const override {
size_t seed = primitive::hash();
@ -89,6 +97,7 @@ struct arg_max_min : public primitive_base<arg_max_min> {
seed = hash_combine(seed, axis);
seed = hash_combine(seed, sort);
seed = hash_combine(seed, values_first);
seed = hash_combine(seed, stable);
return seed;
}
@ -102,7 +111,8 @@ struct arg_max_min : public primitive_base<arg_max_min> {
top_k == rhs_casted.top_k &&
axis == rhs_casted.axis &&
sort == rhs_casted.sort &&
values_first == rhs_casted.values_first;
values_first == rhs_casted.values_first &&
stable == rhs_casted.stable;
}
size_t get_output_nums() const {
@ -120,6 +130,7 @@ struct arg_max_min : public primitive_base<arg_max_min> {
ob << axis;
ob << make_data(&sort, sizeof(ov::op::TopKSortType));
ob << values_first;
ob << stable;
}
void load(BinaryInputBuffer& ib) override {
@ -131,6 +142,7 @@ struct arg_max_min : public primitive_base<arg_max_min> {
ib >> axis;
ib >> make_data(&sort, sizeof(ov::op::TopKSortType));
ib >> values_first;
ib >> stable;
}
};
} // namespace cldnn

View File

@ -66,6 +66,7 @@ public:
const auto& mode = primitive->mode;
const auto& sort_type = primitive->sort;
const auto& values_first = primitive->values_first;
const auto& stable = primitive->stable;
const auto& outputs_num = primitive->input_size() == 3 ? 2 : static_cast<uint32_t>(primitive->output_size());
auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(impl_param, is_shape_agnostic);
@ -107,6 +108,7 @@ public:
}
argm_params.values_first = values_first;
argm_params.stable = stable;
return {argm_params, argm_optional_params};
}

View File

@ -21,6 +21,11 @@ JitConstants ArgMaxMinKernelBase::GetJitConstants(const arg_max_min_params& para
MakeJitConstant(toString(params.argMaxMinAxis) + "_AXIS", 1),
params.argMaxMinOut == ArgMaxMinOut::MAX ? MakeJitConstant("MAX_OUT", 1) : MakeJitConstant("MIN_OUT", 1)});
// For now, we don't use this constant in the kernel as sorting is always stable.
if (params.stable) {
jit.AddConstant(MakeJitConstant("STABLE", true));
}
return jit;
}

View File

@ -22,6 +22,7 @@ struct arg_max_min_params : public base_params {
bool values_first = false;
bool has_second_output = false;
bool use_multiple_outputs = false;
bool stable = false;
ParamsKey GetParamsKey() const override {
ParamsKey k = base_params::GetParamsKey();

View File

@ -26,7 +26,7 @@ void CreateExperimentalDetectronTopKROIsOp(Program &p,
auto argmax_layer_name = layer_name + "_topk";
auto top_k_indices = arg_max_min(argmax_layer_name,
{inputs[1]}, ov::op::TopKMode::MAX, max_rois, 0,
ov::op::TopKSortType::SORT_VALUES, false, cldnn::padding(), cldnn::data_types::i32);
ov::op::TopKSortType::SORT_VALUES, false, false, cldnn::padding(), cldnn::data_types::i32);
p.add_primitive(*op, top_k_indices);

View File

@ -14,17 +14,17 @@
namespace ov {
namespace intel_gpu {
static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>& op) {
static void TopKImpl(Program& p,
const std::shared_ptr<ngraph::Node>& op,
ov::op::TopKMode mode,
ov::op::TopKSortType stype,
uint32_t top_k,
uint64_t chosen_axis,
bool stable = false) {
validate_inputs_count(op, {2});
auto inputs = p.GetInputInfo(op);
std::string layerName = layer_type_name_ID(op);
ov::op::TopKMode mode = op->get_mode();
ov::op::TopKSortType stype = op->get_sort_type();
uint32_t top_k = static_cast<uint32_t>(op->get_k());
uint64_t chosen_axis = op->get_axis();
if (p.use_new_shape_infer()) {
size_t num_outputs = op->get_output_size();
auto get_output_paddings = [&]() {
@ -51,6 +51,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
chosen_axis,
stype,
true,
stable,
cldnn::padding({0, 0, 0, 0}, 0),
cldnn::element_type_to_data_type(op->get_output_element_type(0)),
num_outputs);
@ -85,6 +86,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
chosen_axis,
stype,
true,
stable,
cldnn::padding({0, 0, 0, 0}, 0),
cldnn::element_type_to_data_type(op->get_output_element_type(0)));
@ -103,6 +105,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
chosen_axis,
stype,
true,
stable,
cldnn::padding({0, 0, 0, 0}, 0),
cldnn::element_type_to_data_type(op->get_output_element_type(0)));
@ -113,7 +116,16 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
}
}
static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>& op) {
TopKImpl(p, op, op->get_mode(), op->get_sort_type(), static_cast<uint32_t>(op->get_k()), op->get_axis());
}
static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v11::TopK>& op) {
TopKImpl(p, op, op->get_mode(), op->get_sort_type(), static_cast<uint32_t>(op->get_k()), op->get_axis(), op->get_stable());
}
REGISTER_FACTORY_IMPL(v1, TopK);
REGISTER_FACTORY_IMPL(v11, TopK);
} // namespace intel_gpu
} // namespace ov

View File

@ -90,6 +90,7 @@
#include <transformations/init_node_info.hpp>
#include <transformations/rt_info/fused_names_attribute.hpp>
#include <transformations/op_conversions/convert_shapeof3.hpp>
#include <transformations/op_conversions/convert_topk11_downgrade.hpp>
#include <transformations/low_precision/mark_dequantization_subgraph.hpp>
#include <low_precision/pull_reshape_through_dequantization.hpp>
@ -442,6 +443,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
pass_config->disable<ov::pass::ConvertShapeOf3>();
pass_config->disable<ov::pass::ConvertGather8ToGather7>();
pass_config->disable<ov::pass::ConvertGather7ToGather1>();
pass_config->disable<ov::pass::ConvertTopK11ToTopK3>();
pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();

View File

@ -2,17 +2,151 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <tuple>
#include <vector>
#include "single_layer_tests/topk.hpp"
#include "ngraph_functions/builders.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
using namespace LayerTestsDefinitions;
namespace GPULayerTestsDefinitions {
typedef std::tuple<int64_t, // keepK
int64_t, // axis
ov::op::TopKMode, // mode
ov::op::TopKSortType, // sort
bool, // stable
InferenceEngine::Precision, // Net precision
InferenceEngine::Precision, // Input precision
InferenceEngine::Precision, // Output precision
InferenceEngine::Layout, // Input layout
InferenceEngine::SizeVector, // inputShape
std::string // Target device name
>
TopKGPUParams;
class TopKLayerTestGPU : public testing::WithParamInterface<TopKGPUParams>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<TopKGPUParams>& obj);
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override;
protected:
void SetUp() override;
};
std::string TopKLayerTestGPU::getTestCaseName(const testing::TestParamInfo<TopKGPUParams>& obj) {
InferenceEngine::Precision netPrecision;
InferenceEngine::Precision inPrc, outPrc;
InferenceEngine::Layout inLayout;
InferenceEngine::SizeVector inputShape;
std::string targetDevice;
int64_t keepK, axis;
ov::op::TopKMode mode;
ov::op::TopKSortType sort;
bool stable;
std::tie(keepK, axis, mode, sort, stable, netPrecision, inPrc, outPrc, inLayout, inputShape, targetDevice) =
obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
result << "k=" << keepK << "_";
result << "axis=" << axis << "_";
result << "mode=" << mode << "_";
result << "sort=" << sort << "_";
result << "stable=" << stable << "_";
result << "netPRC=" << netPrecision.name() << "_";
result << "inPRC=" << inPrc.name() << "_";
result << "outPRC=" << outPrc.name() << "_";
result << "inL=" << inLayout << "_";
result << "trgDev=" << targetDevice;
return result.str();
}
void TopKLayerTestGPU::SetUp() {
InferenceEngine::SizeVector inputShape;
InferenceEngine::Precision netPrecision;
int64_t keepK, axis;
ov::op::TopKMode mode;
ov::op::TopKSortType sort;
bool stable;
std::tie(keepK, axis, mode, sort, stable, netPrecision, inPrc, outPrc, inLayout, inputShape, targetDevice) =
this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramIn = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto k = std::make_shared<ov::op::v0::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{}, &keepK);
auto topk = std::dynamic_pointer_cast<ov::op::v11::TopK>(
std::make_shared<ov::op::v11::TopK>(paramIn[0], k, axis, mode, sort, ngraph::element::Type_t::i64, stable));
ngraph::ResultVector results;
for (size_t i = 0; i < topk->get_output_size(); i++) {
results.push_back(std::make_shared<ov::op::v0::Result>(topk->output(i)));
}
function = std::make_shared<ngraph::Function>(results, params, "TopK");
}
InferenceEngine::Blob::Ptr TopKLayerTestGPU::GenerateInput(const InferenceEngine::InputInfo& info) const {
IE_ASSERT(InferenceEngine::Precision::FP32 == info.getTensorDesc().getPrecision() ||
InferenceEngine::Precision::BF16 == info.getTensorDesc().getPrecision() ||
InferenceEngine::Precision::FP16 == info.getTensorDesc().getPrecision());
InferenceEngine::Precision netPrecision;
InferenceEngine::Precision inPrc, outPrc;
InferenceEngine::Layout inLayout;
InferenceEngine::SizeVector inputShape;
std::string targetDevice;
int64_t keepK, axis;
ov::op::TopKMode mode;
ov::op::TopKSortType sort;
bool stable;
std::tie(keepK, axis, mode, sort, stable, netPrecision, inPrc, outPrc, inLayout, inputShape, targetDevice) =
this->GetParam();
InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
blob->allocate();
// For unstable sorting, generate unrepeated input data.
// While for stable sorting repeating values are explicitly set.
size_t size = blob->size();
int start = -static_cast<int>(size / 2);
std::vector<int> data(size);
size_t set_size = sort == ov::op::TopKSortType::SORT_VALUES && stable ? size / 2 : size;
std::iota(data.begin(), data.begin() + set_size, start);
if (sort == ov::op::TopKSortType::SORT_VALUES && stable) {
std::copy(data.begin(), data.begin() + set_size, data.begin() + set_size);
}
std::mt19937 gen(0);
std::shuffle(data.begin(), data.end(), gen);
float divisor = size / 10.0;
if (InferenceEngine::Precision::FP32 == info.getTensorDesc().getPrecision()) {
auto* rawBlobDataPtr = blob->buffer().as<float*>();
for (size_t i = 0; i < size; i++) {
rawBlobDataPtr[i] = static_cast<float>(data[i] / divisor);
}
} else if (InferenceEngine::Precision::BF16 == info.getTensorDesc().getPrecision()) {
auto* rawBlobDataPtr = blob->buffer().as<ngraph::bfloat16*>();
for (size_t i = 0; i < size; i++) {
rawBlobDataPtr[i] = static_cast<ngraph::bfloat16>(data[i] / divisor);
}
} else if (InferenceEngine::Precision::FP16 == info.getTensorDesc().getPrecision()) {
auto* rawBlobDataPtr = blob->buffer().as<ngraph::float16*>();
for (size_t i = 0; i < size; i++) {
rawBlobDataPtr[i] = static_cast<ngraph::float16>(data[i] / divisor);
}
}
return blob;
}
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
InferenceEngine::Precision::FP16,
};
const std::vector<int64_t> axes = {
@ -27,28 +161,38 @@ const std::vector<int64_t> k = {
10,
};
const std::vector<ngraph::opset4::TopK::Mode> modes = {
ngraph::opset4::TopK::Mode::MIN,
ngraph::opset4::TopK::Mode::MAX
const std::vector<ov::op::TopKMode> modes = {
ov::op::TopKMode::MIN,
ov::op::TopKMode::MAX,
};
const std::vector<ngraph::opset4::TopK::SortType> sortTypes = {
ngraph::opset4::TopK::SortType::SORT_INDICES,
ngraph::opset4::TopK::SortType::SORT_VALUES,
const std::vector<ov::op::TopKSortType> sortTypes = {
ov::op::TopKSortType::SORT_INDICES,
ov::op::TopKSortType::SORT_VALUES,
};
const std::vector<bool> stable = {
false,
true,
};
INSTANTIATE_TEST_SUITE_P(smoke_TopK, TopKLayerTest,
::testing::Combine(
::testing::ValuesIn(k),
TEST_P(TopKLayerTestGPU, CompareWithRefs) {
Run();
}
INSTANTIATE_TEST_SUITE_P(smoke_TopK,
TopKLayerTestGPU,
::testing::Combine(::testing::ValuesIn(k),
::testing::ValuesIn(axes),
::testing::ValuesIn(modes),
::testing::ValuesIn(sortTypes),
::testing::ValuesIn(stable),
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({10, 10, 10})),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
TopKLayerTest::getTestCaseName);
TopKLayerTestGPU::getTestCaseName);
} // namespace
} // namespace GPULayerTestsDefinitions

View File

@ -64,7 +64,7 @@ TEST_P(arg_max_min_test, shape_infer) {
auto arg_max_min_prim = std::make_shared<arg_max_min>("output", p.inputs.empty() ? input_prim_ids : p.inputs,
p.mode, p.top_k, p.axis,
ov::op::TopKSortType::SORT_VALUES, false, padding(),
ov::op::TopKSortType::SORT_VALUES, false, false, padding(),
p.output_data_type, p.num_outputs);
std::vector<padding> output_paddings;
std::vector<optional_data_type> output_data_types;

View File

@ -113,6 +113,7 @@ TEST(arg_max_gpu_min_axis_batch_bfzyx, i32) {
0,
ov::op::TopKSortType::SORT_VALUES,
false,
false,
padding(),
data_types::i32));
@ -163,6 +164,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb, f32) {
2,
ov::op::TopKSortType::SORT_VALUES,
false,
false,
padding(),
data_types::f32));
@ -230,6 +232,7 @@ TEST(arg_max_gpu_min_axis_batch_yxfb, f32) {
0,
ov::op::TopKSortType::SORT_VALUES,
false,
false,
padding(),
data_types::f32));
@ -295,6 +298,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) {
2,
ov::op::TopKSortType::SORT_VALUES,
false,
false,
padding(),
data_types::f32));
@ -405,6 +409,7 @@ TEST(top_k_layer_tests, second_output2) {
0,
ov::op::TopKSortType::SORT_VALUES,
false,
false,
padding(),
data_types::f32));
@ -495,6 +500,7 @@ TEST(top_k_layer_tests, multiple_outputs) {
0,
ov::op::TopKSortType::SORT_VALUES,
false,
false,
padding(),
data_types::f32,
2);
@ -580,6 +586,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) {
2,
ov::op::TopKSortType::SORT_VALUES,
false,
false,
padding(),
data_types::f32));
@ -637,6 +644,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) {
2,
ov::op::TopKSortType::SORT_INDICES,
false,
false,
padding(),
data_types::f32));
@ -695,6 +703,7 @@ void test_top_k_layer_tests_sort_probabilities_by_indices(bool is_caching_test)
3,
ov::op::TopKSortType::SORT_VALUES,
false,
false,
padding(),
data_types::i32));
std::vector<T> input_vec = {0.9f, 0.1f, 0.2f, 0.8f, 0.5f, 0.6f, 0.3f, 0.4f, 0.7f, 0.95f};

View File

@ -47,7 +47,7 @@ void TopKLayerTest::SetUp() {
std::make_shared<ngraph::opset4::TopK>(paramIn[0], k, axis, mode, sort));
ngraph::ResultVector results;
for (int i = 0; i < topk->get_output_size(); i++) {
for (size_t i = 0; i < topk->get_output_size(); i++) {
results.push_back(std::make_shared<ngraph::opset4::Result>(topk->output(i)));
}
function = std::make_shared<ngraph::Function>(results, params, "TopK");