[GPU] Update TopK to support non const top_k input (#15429)
This commit is contained in:
parent
4351c060f3
commit
bd4d74d3dc
@ -43,6 +43,25 @@ struct arg_max_min : public primitive_base<arg_max_min> {
|
||||
sort(sort),
|
||||
values_first(values_first) {}
|
||||
|
||||
/// @brief Constructs arg_max_min for top_k parameter
|
||||
arg_max_min(const primitive_id& id,
|
||||
const input_info& input,
|
||||
const input_info& topk_id,
|
||||
ov::op::TopKMode mode,
|
||||
uint32_t top_k,
|
||||
int64_t axis,
|
||||
ov::op::TopKSortType sort = ov::op::TopKSortType::SORT_VALUES,
|
||||
bool values_first = false,
|
||||
const padding& output_padding = padding(),
|
||||
data_types output_data_type = data_types::f32,
|
||||
const size_t num_outputs = 1)
|
||||
: primitive_base(id, {input, topk_id}, {output_padding}, {optional_data_type{output_data_type}}, num_outputs),
|
||||
mode(mode),
|
||||
top_k(top_k),
|
||||
axis(axis),
|
||||
sort(sort),
|
||||
values_first(values_first) {}
|
||||
|
||||
/// @brief Type of output - max or min.
|
||||
ov::op::TopKMode mode;
|
||||
/// @brief Number of indices to output.
|
||||
|
@ -75,7 +75,8 @@ std::vector<layout> arg_max_min_inst::calc_output_layouts(arg_max_min_node const
|
||||
auto input_layout = impl_param.get_input_layout();
|
||||
|
||||
ov::op::v1::TopK op;
|
||||
op.set_axis(input_layout.get<ShapeType>().rank(), desc->axis);
|
||||
auto input_rank = input_layout.get<ShapeType>().rank();
|
||||
op.set_axis(input_rank, desc->axis);
|
||||
op.set_mode(desc->mode);
|
||||
op.set_sort_type(desc->sort);
|
||||
|
||||
@ -85,13 +86,24 @@ std::vector<layout> arg_max_min_inst::calc_output_layouts(arg_max_min_node const
|
||||
ShapeType{}
|
||||
};
|
||||
|
||||
int64_t top_k = desc->top_k;
|
||||
auto& constant_mem = impl_param.memory_deps;
|
||||
if (desc->top_k > 0) {
|
||||
std::map<size_t, ngraph::HostTensorPtr> const_data;
|
||||
auto topk = desc->top_k;
|
||||
auto top_k_tensor = std::make_shared<ngraph::runtime::HostTensor>(ov::element::u32, ov::Shape{1}, static_cast<void*>(&topk));
|
||||
const_data = { {1, top_k_tensor} };
|
||||
|
||||
auto top_k_tensor = std::make_shared<ngraph::runtime::HostTensor>(ov::element::i64, ov::Shape{1}, static_cast<void*>(&top_k));
|
||||
std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> const_data = {
|
||||
{1, top_k_tensor}
|
||||
};
|
||||
ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
|
||||
ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
|
||||
} else if (constant_mem.count(1)) {
|
||||
std::map<size_t, ngraph::HostTensorPtr> const_data;
|
||||
auto target_shape_mem = constant_mem.at(1);
|
||||
cldnn::mem_lock<uint8_t, mem_lock_type::read> target_shape_lock(target_shape_mem, impl_param.prog->get_stream());
|
||||
const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data()));
|
||||
|
||||
ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
|
||||
} else {
|
||||
output_shapes[0] = output_shapes[1] = ShapeType::dynamic(input_layout.get<ShapeType>().size());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < desc->num_outputs; ++i) {
|
||||
auto dt = desc->output_data_types[i].value_or(input_layout.data_type);
|
||||
|
@ -52,29 +52,41 @@ protected:
|
||||
kernel_arguments_data args = parent::get_arguments(instance);
|
||||
|
||||
if (instance.node->has_second_output()) {
|
||||
args.inputs.erase(args.inputs.begin() + 1); // erase constant input in case of TOP_K
|
||||
if (args.inputs.size() > 1) {
|
||||
args.inputs.erase(args.inputs.begin() + 1); // erase constant input in case of TOP_K
|
||||
}
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static std::unique_ptr<primitive_impl> create(const arg_max_min_node& arg, const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
const auto& primitive = impl_param.typed_desc<arg_max_min>();
|
||||
const auto& axis = primitive->axis;
|
||||
const auto& top_k = primitive->top_k;
|
||||
const auto& mode = primitive->mode;
|
||||
const auto& sort_type = primitive->sort;
|
||||
const auto& values_first = primitive->values_first;
|
||||
const auto& outputs_num = arg.get_output_nums(); // second output passed as input for TOP_K layer
|
||||
const auto& outputs_num = (primitive->input_size() == 3 ? 2 : primitive->output_size());
|
||||
|
||||
auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(impl_param);
|
||||
auto argm_optional_params =
|
||||
get_default_optional_params<kernel_selector::arg_max_min_optional_params>(impl_param.get_program());
|
||||
|
||||
argm_params.outputs_num = outputs_num;
|
||||
argm_params.topK = top_k;
|
||||
argm_params.argMaxMinAxis = GetArgMaxMinAxis(axis, arg.get_output_layout().get_rank());
|
||||
argm_params.argMaxMinAxis = GetArgMaxMinAxis(axis, impl_param.get_output_layout().get_rank());
|
||||
|
||||
auto& constant_mem = impl_param.memory_deps;
|
||||
if (constant_mem.count(1)) {
|
||||
// The topK could be got by reading impl_param.memory_deps.at(1).
|
||||
// However, here we utilize output_layout and axis information to minimize mem_lock.
|
||||
auto output_layout = impl_param.get_output_layout(0);
|
||||
auto out_dims = output_layout.get_dims();
|
||||
argm_params.topK = out_dims[axis];
|
||||
} else {
|
||||
argm_params.topK = top_k;
|
||||
}
|
||||
|
||||
if (mode == ov::op::TopKMode::MAX)
|
||||
argm_params.argMaxMinOut = kernel_selector::argm_output::MAX;
|
||||
@ -86,9 +98,9 @@ public:
|
||||
else
|
||||
argm_params.argMaxMinSortType = kernel_selector::argm_sort::INDEX;
|
||||
|
||||
if (arg.has_second_output()) { // for backward compatibility
|
||||
if (outputs_num == 2) { // for backward compatibility
|
||||
argm_params.has_second_output = true;
|
||||
if (arg.use_multiple_outputs()) {
|
||||
if (primitive->input_size() != 3) {
|
||||
argm_params.use_multiple_outputs = true;
|
||||
argm_params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1)));
|
||||
} else {
|
||||
@ -98,10 +110,7 @@ public:
|
||||
|
||||
argm_params.values_first = values_first;
|
||||
|
||||
auto& kernel_selector = kernel_selector::arg_max_min_kernel_selector::Instance();
|
||||
auto best_kernel = kernel_selector.get_best_kernel(argm_params, argm_optional_params);
|
||||
|
||||
return make_unique<arg_max_min_impl>(best_kernel);
|
||||
return {argm_params, argm_optional_params};
|
||||
}
|
||||
};
|
||||
|
||||
@ -119,7 +128,10 @@ attach_arg_max_min_impl::attach_arg_max_min_impl() {
|
||||
|
||||
format::bfzyx};
|
||||
|
||||
implementation_map<arg_max_min>::add(impl_types::ocl, arg_max_min_impl::create, types, formats);
|
||||
implementation_map<arg_max_min>::add(impl_types::ocl,
|
||||
typed_primitive_impl_ocl<arg_max_min>::create<arg_max_min_impl>,
|
||||
types,
|
||||
formats);
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
|
@ -41,16 +41,19 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
|
||||
}
|
||||
return output_data_types;
|
||||
};
|
||||
|
||||
auto topk_constant = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(1).get_node_shared_ptr());
|
||||
auto argmaxPrim = cldnn::arg_max_min(layerName,
|
||||
inputs,
|
||||
mode,
|
||||
top_k,
|
||||
chosen_axis,
|
||||
stype,
|
||||
true,
|
||||
cldnn::padding({0, 0, 0, 0}, 0),
|
||||
cldnn::element_type_to_data_type(op->get_output_element_type(0)),
|
||||
num_outputs);
|
||||
inputs[0],
|
||||
inputs[1],
|
||||
mode,
|
||||
(topk_constant ? top_k : 0),
|
||||
chosen_axis,
|
||||
stype,
|
||||
true,
|
||||
cldnn::padding({0, 0, 0, 0}, 0),
|
||||
cldnn::element_type_to_data_type(op->get_output_element_type(0)),
|
||||
num_outputs);
|
||||
argmaxPrim.output_paddings = get_output_paddings();
|
||||
argmaxPrim.output_data_types = get_output_data_types();
|
||||
p.add_primitive(*op, argmaxPrim);
|
||||
|
@ -129,4 +129,4 @@ INSTANTIATE_TEST_SUITE_P(smoke, arg_max_min_test,
|
||||
},
|
||||
}));
|
||||
|
||||
} // shape_infer_tests
|
||||
} // namespace shape_infer_tests
|
||||
|
@ -19,16 +19,16 @@ using namespace ov::test;
|
||||
namespace GPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
int64_t, // keepK
|
||||
int64_t, // axis
|
||||
ngraph::opset4::TopK::Mode, // mode
|
||||
ngraph::opset4::TopK::SortType, // sort
|
||||
ElementType, // Net precision
|
||||
ElementType, // Input precision
|
||||
ElementType, // Output precision
|
||||
InputShape, // inputShape
|
||||
TargetDevice, // Device name
|
||||
std::map<std::string, std::string> // Additional network configuration
|
||||
int64_t, // keepK
|
||||
int64_t, // axis
|
||||
ngraph::opset4::TopK::Mode, // mode
|
||||
ngraph::opset4::TopK::SortType, // sort
|
||||
ElementType, // Net precision
|
||||
ElementType, // Input precision
|
||||
ElementType, // Output precision
|
||||
InputShape, // inputShape
|
||||
TargetDevice, // Device name
|
||||
ngraph::helpers::InputLayerType // Input type
|
||||
> TopKLayerTestParamsSet;
|
||||
|
||||
class TopKLayerGPUTest : public testing::WithParamInterface<TopKLayerTestParamsSet>,
|
||||
@ -43,8 +43,8 @@ public:
|
||||
ElementType netPrecision, inPrc, outPrc;
|
||||
InputShape inputShape;
|
||||
TargetDevice targetDevice;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape, targetDevice, additionalConfig) = basicParamsSet;
|
||||
ngraph::helpers::InputLayerType inputType;
|
||||
std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape, targetDevice, inputType) = basicParamsSet;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "k=" << keepK << "_";
|
||||
@ -58,11 +58,8 @@ public:
|
||||
for (const auto& shape : inputShape.second) {
|
||||
result << CommonTestUtils::vec2str(shape) << "_";
|
||||
}
|
||||
result << "config=(";
|
||||
for (const auto& configEntry : additionalConfig) {
|
||||
result << configEntry.first << ", " << configEntry.second << ":";
|
||||
}
|
||||
result << ")_";
|
||||
result << "inputType=" << inputType;
|
||||
result << "TargetDevice=" << targetDevice;
|
||||
|
||||
return result.str();
|
||||
@ -77,16 +74,29 @@ protected:
|
||||
ngraph::opset4::TopK::SortType sort;
|
||||
ElementType inPrc, outPrc;
|
||||
InputShape inputShape;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape, targetDevice, additionalConfig) = basicParamsSet;
|
||||
std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape, targetDevice, inputType) = basicParamsSet;
|
||||
|
||||
init_input_shapes({inputShape});
|
||||
if (inputType == ngraph::helpers::InputLayerType::CONSTANT) {
|
||||
init_input_shapes({inputShape});
|
||||
} else {
|
||||
inputDynamicShapes = {inputShape.first, {}};
|
||||
for (size_t i = 0; i < inputShape.second.size(); ++i) {
|
||||
targetStaticShapes.push_back({inputShape.second[i], {}});
|
||||
}
|
||||
}
|
||||
|
||||
auto params = ngraph::builder::makeDynamicParams(netPrecision, {inputDynamicShapes[0]});
|
||||
|
||||
std::shared_ptr<ngraph::opset4::TopK> topk;
|
||||
auto k = std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{}, &keepK);
|
||||
topk = std::dynamic_pointer_cast<ngraph::opset4::TopK>(std::make_shared<ngraph::opset4::TopK>(params[0], k, axis, mode, sort));
|
||||
if (inputType == ngraph::helpers::InputLayerType::CONSTANT) {
|
||||
auto k = std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{}, &keepK);
|
||||
topk = std::dynamic_pointer_cast<ngraph::opset4::TopK>(std::make_shared<ngraph::opset4::TopK>(params[0], k, axis, mode, sort));
|
||||
} else {
|
||||
auto k = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::Type_t::i64, inputDynamicShapes[1]);
|
||||
params.push_back(k);
|
||||
topk = std::dynamic_pointer_cast<ngraph::opset4::TopK>(
|
||||
std::make_shared<ngraph::opset4::TopK>(params[0], k, axis, mode, sort));
|
||||
}
|
||||
|
||||
ngraph::ResultVector results;
|
||||
for (size_t i = 0; i < topk->get_output_size(); i++) {
|
||||
@ -104,60 +114,41 @@ protected:
|
||||
tensor = ov::test::utils::create_and_fill_tensor(funcInputs[0].get_element_type(), shape);
|
||||
size_t size = tensor.get_size();
|
||||
|
||||
if (netPrecision == ElementType::f32 || netPrecision == ElementType::i32) {
|
||||
if (netPrecision == ElementType::f32) {
|
||||
std::vector<int> data(size);
|
||||
|
||||
// For int32, deliberately set big numbers which are not accurately representable in fp32
|
||||
int start = netPrecision == ElementType::i32 ? pow(2, 30) + 1 : - static_cast<int>(size / 2);
|
||||
int start = - static_cast<int>(size / 2);
|
||||
std::iota(data.begin(), data.end(), start);
|
||||
std::mt19937 gen(0);
|
||||
std::shuffle(data.begin(), data.end(), gen);
|
||||
|
||||
if (netPrecision == ElementType::f32) {
|
||||
auto *rawBlobDataPtr = static_cast<float *>(tensor.data());
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
rawBlobDataPtr[i] = static_cast<float>(data[i]);
|
||||
}
|
||||
} else {
|
||||
auto *rawBlobDataPtr = static_cast<int32_t *>(tensor.data());
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
rawBlobDataPtr[i] = static_cast<int32_t>(data[i]);
|
||||
}
|
||||
}
|
||||
} else if (netPrecision == ElementType::bf16) {
|
||||
size_t O = 1, A = 1, I = 1;
|
||||
A = shape[axis];
|
||||
for (size_t i = 0; i < axis; i++)
|
||||
O *= shape[i];
|
||||
for (size_t i = axis + 1; i < shape.size(); i++)
|
||||
I *= shape[i];
|
||||
if (O * A * I != size)
|
||||
FAIL() << "Incorrect blob shape " << shape;
|
||||
|
||||
auto *rawBlobDataPtr = static_cast<ngraph::bfloat16 *>(tensor.data());
|
||||
for (size_t o = 0; o < O; o++) {
|
||||
for (size_t i = 0; i < I; i++) {
|
||||
std::vector<int> data(A);
|
||||
int start = - static_cast<int>(A / 2);
|
||||
std::iota(data.begin(), data.end(), start);
|
||||
const size_t seed = (o + 1) * (i + 1);
|
||||
std::mt19937 gen(seed);
|
||||
std::shuffle(data.begin(), data.end(), gen);
|
||||
for (size_t a = 0; a < A; a++) {
|
||||
rawBlobDataPtr[o * A * I + a * I + i] = static_cast<ngraph::bfloat16>(data[a]);
|
||||
}
|
||||
}
|
||||
auto *rawBlobDataPtr = static_cast<float *>(tensor.data());
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
rawBlobDataPtr[i] = static_cast<float>(data[i]);
|
||||
}
|
||||
} else {
|
||||
FAIL() << "generate_inputs for " << netPrecision << " precision isn't supported";
|
||||
}
|
||||
inputs.insert({funcInputs[0].get_node_shared_ptr(), tensor});
|
||||
|
||||
if (inputType == ngraph::helpers::InputLayerType::PARAMETER) {
|
||||
const auto& kPrecision = funcInputs[1].get_element_type();
|
||||
const auto& kShape = targetInputStaticShapes[1];
|
||||
|
||||
const size_t startFrom = 1;
|
||||
const size_t range = targetInputStaticShapes[0][axis];
|
||||
const size_t seed = inferRequestNum++;
|
||||
const auto kTensor = ov::test::utils::create_and_fill_tensor(kPrecision, kShape, range, startFrom, 1, seed);
|
||||
|
||||
inputs.insert({funcInputs[1].get_node_shared_ptr(), kTensor});
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t axis;
|
||||
size_t inferRequestNum = 0;
|
||||
ElementType netPrecision;
|
||||
bool staticShape;
|
||||
ngraph::helpers::InputLayerType inputType;
|
||||
};
|
||||
|
||||
TEST_P(TopKLayerGPUTest, CompareWithRefs) {
|
||||
@ -168,14 +159,12 @@ TEST_P(TopKLayerGPUTest, CompareWithRefs) {
|
||||
|
||||
namespace {
|
||||
|
||||
std::map<std::string, std::string> emptyAdditionalConfig;
|
||||
|
||||
const std::vector<ElementType> netPrecisions = {
|
||||
ElementType::f32,
|
||||
};
|
||||
|
||||
const std::vector<int64_t> axes = {0, 1, 2, 3};
|
||||
const std::vector<int64_t> k = {1, 5, 7, 18, 21};
|
||||
const std::vector<int64_t> axes = {0, 3};
|
||||
const std::vector<int64_t> k = {3, 5, 7};
|
||||
|
||||
const std::vector<ngraph::opset4::TopK::Mode> modes = {
|
||||
ngraph::opset4::TopK::Mode::MIN,
|
||||
@ -189,12 +178,12 @@ const std::vector<ngraph::opset4::TopK::SortType> sortTypes = {
|
||||
|
||||
std::vector<ov::test::InputShape> inputShapesDynamic = {
|
||||
{
|
||||
{{21, {20, 25}, 21, {20, 25}}, {{21, 21, 21, 21}, {21, 22, 21, 23}}},
|
||||
{ov::PartialShape::dynamic(4), {{21, 21, 21, 21}, {21, 22, 21, 23}}}
|
||||
{ov::PartialShape::dynamic(4), {{7, 7, 7, 7}, {7, 8, 7, 9}}},
|
||||
{{-1, -1, -1, -1}, {{8, 9, 10, 11}, {11, 7, 8, 9}}}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_TopK_dynamic, TopKLayerGPUTest,
|
||||
INSTANTIATE_TEST_CASE_P(smoke_TopK_constant_dynamic, TopKLayerGPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(k),
|
||||
::testing::ValuesIn(axes),
|
||||
@ -205,7 +194,21 @@ INSTANTIATE_TEST_CASE_P(smoke_TopK_dynamic, TopKLayerGPUTest,
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapesDynamic),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values(emptyAdditionalConfig)),
|
||||
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT)),
|
||||
TopKLayerGPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_TopK_parameter_dynamic, TopKLayerGPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(1),
|
||||
::testing::ValuesIn(axes),
|
||||
::testing::ValuesIn(modes),
|
||||
::testing::ValuesIn(sortTypes),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapesDynamic),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values(ngraph::helpers::InputLayerType::PARAMETER)),
|
||||
TopKLayerGPUTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user