[GPU] Update TopK to support non const top_k input (#15429)

This commit is contained in:
Kelvin Choi 2023-02-15 06:51:24 +09:00 committed by GitHub
parent 4351c060f3
commit bd4d74d3dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 145 additions and 96 deletions

View File

@ -43,6 +43,25 @@ struct arg_max_min : public primitive_base<arg_max_min> {
sort(sort),
values_first(values_first) {}
/// @brief Constructs arg_max_min for top_k parameter
arg_max_min(const primitive_id& id,
const input_info& input,
const input_info& topk_id,
ov::op::TopKMode mode,
uint32_t top_k,
int64_t axis,
ov::op::TopKSortType sort = ov::op::TopKSortType::SORT_VALUES,
bool values_first = false,
const padding& output_padding = padding(),
data_types output_data_type = data_types::f32,
const size_t num_outputs = 1)
: primitive_base(id, {input, topk_id}, {output_padding}, {optional_data_type{output_data_type}}, num_outputs),
mode(mode),
top_k(top_k),
axis(axis),
sort(sort),
values_first(values_first) {}
/// @brief Type of output - max or min.
ov::op::TopKMode mode;
/// @brief Number of indices to output.

View File

@ -75,7 +75,8 @@ std::vector<layout> arg_max_min_inst::calc_output_layouts(arg_max_min_node const
auto input_layout = impl_param.get_input_layout();
ov::op::v1::TopK op;
op.set_axis(input_layout.get<ShapeType>().rank(), desc->axis);
auto input_rank = input_layout.get<ShapeType>().rank();
op.set_axis(input_rank, desc->axis);
op.set_mode(desc->mode);
op.set_sort_type(desc->sort);
@ -85,13 +86,24 @@ std::vector<layout> arg_max_min_inst::calc_output_layouts(arg_max_min_node const
ShapeType{}
};
int64_t top_k = desc->top_k;
auto& constant_mem = impl_param.memory_deps;
if (desc->top_k > 0) {
std::map<size_t, ngraph::HostTensorPtr> const_data;
auto topk = desc->top_k;
auto top_k_tensor = std::make_shared<ngraph::runtime::HostTensor>(ov::element::u32, ov::Shape{1}, static_cast<void*>(&topk));
const_data = { {1, top_k_tensor} };
auto top_k_tensor = std::make_shared<ngraph::runtime::HostTensor>(ov::element::i64, ov::Shape{1}, static_cast<void*>(&top_k));
std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> const_data = {
{1, top_k_tensor}
};
ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
} else if (constant_mem.count(1)) {
std::map<size_t, ngraph::HostTensorPtr> const_data;
auto target_shape_mem = constant_mem.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> target_shape_lock(target_shape_mem, impl_param.prog->get_stream());
const_data.emplace(1, make_host_tensor(target_shape_mem->get_layout(), target_shape_lock.data()));
ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
} else {
output_shapes[0] = output_shapes[1] = ShapeType::dynamic(input_layout.get<ShapeType>().size());
}
for (size_t i = 0; i < desc->num_outputs; ++i) {
auto dt = desc->output_data_types[i].value_or(input_layout.data_type);

View File

@ -52,29 +52,41 @@ protected:
kernel_arguments_data args = parent::get_arguments(instance);
if (instance.node->has_second_output()) {
args.inputs.erase(args.inputs.begin() + 1); // erase constant input in case of TOP_K
if (args.inputs.size() > 1) {
args.inputs.erase(args.inputs.begin() + 1); // erase constant input in case of TOP_K
}
}
return args;
}
public:
static std::unique_ptr<primitive_impl> create(const arg_max_min_node& arg, const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
const auto& primitive = impl_param.typed_desc<arg_max_min>();
const auto& axis = primitive->axis;
const auto& top_k = primitive->top_k;
const auto& mode = primitive->mode;
const auto& sort_type = primitive->sort;
const auto& values_first = primitive->values_first;
const auto& outputs_num = arg.get_output_nums(); // second output passed as input for TOP_K layer
const auto& outputs_num = (primitive->input_size() == 3 ? 2 : primitive->output_size());
auto argm_params = get_default_params<kernel_selector::arg_max_min_params>(impl_param);
auto argm_optional_params =
get_default_optional_params<kernel_selector::arg_max_min_optional_params>(impl_param.get_program());
argm_params.outputs_num = outputs_num;
argm_params.topK = top_k;
argm_params.argMaxMinAxis = GetArgMaxMinAxis(axis, arg.get_output_layout().get_rank());
argm_params.argMaxMinAxis = GetArgMaxMinAxis(axis, impl_param.get_output_layout().get_rank());
auto& constant_mem = impl_param.memory_deps;
if (constant_mem.count(1)) {
// The topK could be got by reading impl_param.memory_deps.at(1).
// However, here we utilize output_layout and axis information to minimize mem_lock.
auto output_layout = impl_param.get_output_layout(0);
auto out_dims = output_layout.get_dims();
argm_params.topK = out_dims[axis];
} else {
argm_params.topK = top_k;
}
if (mode == ov::op::TopKMode::MAX)
argm_params.argMaxMinOut = kernel_selector::argm_output::MAX;
@ -86,9 +98,9 @@ public:
else
argm_params.argMaxMinSortType = kernel_selector::argm_sort::INDEX;
if (arg.has_second_output()) { // for backward compatibility
if (outputs_num == 2) { // for backward compatibility
argm_params.has_second_output = true;
if (arg.use_multiple_outputs()) {
if (primitive->input_size() != 3) {
argm_params.use_multiple_outputs = true;
argm_params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1)));
} else {
@ -98,10 +110,7 @@ public:
argm_params.values_first = values_first;
auto& kernel_selector = kernel_selector::arg_max_min_kernel_selector::Instance();
auto best_kernel = kernel_selector.get_best_kernel(argm_params, argm_optional_params);
return make_unique<arg_max_min_impl>(best_kernel);
return {argm_params, argm_optional_params};
}
};
@ -119,7 +128,10 @@ attach_arg_max_min_impl::attach_arg_max_min_impl() {
format::bfzyx};
implementation_map<arg_max_min>::add(impl_types::ocl, arg_max_min_impl::create, types, formats);
implementation_map<arg_max_min>::add(impl_types::ocl,
typed_primitive_impl_ocl<arg_max_min>::create<arg_max_min_impl>,
types,
formats);
}
} // namespace detail
} // namespace ocl

View File

@ -41,16 +41,19 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
}
return output_data_types;
};
auto topk_constant = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(1).get_node_shared_ptr());
auto argmaxPrim = cldnn::arg_max_min(layerName,
inputs,
mode,
top_k,
chosen_axis,
stype,
true,
cldnn::padding({0, 0, 0, 0}, 0),
cldnn::element_type_to_data_type(op->get_output_element_type(0)),
num_outputs);
inputs[0],
inputs[1],
mode,
(topk_constant ? top_k : 0),
chosen_axis,
stype,
true,
cldnn::padding({0, 0, 0, 0}, 0),
cldnn::element_type_to_data_type(op->get_output_element_type(0)),
num_outputs);
argmaxPrim.output_paddings = get_output_paddings();
argmaxPrim.output_data_types = get_output_data_types();
p.add_primitive(*op, argmaxPrim);

View File

@ -129,4 +129,4 @@ INSTANTIATE_TEST_SUITE_P(smoke, arg_max_min_test,
},
}));
} // shape_infer_tests
} // namespace shape_infer_tests

View File

@ -19,16 +19,16 @@ using namespace ov::test;
namespace GPULayerTestsDefinitions {
typedef std::tuple<
int64_t, // keepK
int64_t, // axis
ngraph::opset4::TopK::Mode, // mode
ngraph::opset4::TopK::SortType, // sort
ElementType, // Net precision
ElementType, // Input precision
ElementType, // Output precision
InputShape, // inputShape
TargetDevice, // Device name
std::map<std::string, std::string> // Additional network configuration
int64_t, // keepK
int64_t, // axis
ngraph::opset4::TopK::Mode, // mode
ngraph::opset4::TopK::SortType, // sort
ElementType, // Net precision
ElementType, // Input precision
ElementType, // Output precision
InputShape, // inputShape
TargetDevice, // Device name
ngraph::helpers::InputLayerType // Input type
> TopKLayerTestParamsSet;
class TopKLayerGPUTest : public testing::WithParamInterface<TopKLayerTestParamsSet>,
@ -43,8 +43,8 @@ public:
ElementType netPrecision, inPrc, outPrc;
InputShape inputShape;
TargetDevice targetDevice;
std::map<std::string, std::string> additionalConfig;
std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape, targetDevice, additionalConfig) = basicParamsSet;
ngraph::helpers::InputLayerType inputType;
std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape, targetDevice, inputType) = basicParamsSet;
std::ostringstream result;
result << "k=" << keepK << "_";
@ -58,11 +58,8 @@ public:
for (const auto& shape : inputShape.second) {
result << CommonTestUtils::vec2str(shape) << "_";
}
result << "config=(";
for (const auto& configEntry : additionalConfig) {
result << configEntry.first << ", " << configEntry.second << ":";
}
result << ")_";
result << "inputType=" << inputType;
result << "TargetDevice=" << targetDevice;
return result.str();
@ -77,16 +74,29 @@ protected:
ngraph::opset4::TopK::SortType sort;
ElementType inPrc, outPrc;
InputShape inputShape;
std::map<std::string, std::string> additionalConfig;
std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape, targetDevice, additionalConfig) = basicParamsSet;
std::tie(keepK, axis, mode, sort, netPrecision, inPrc, outPrc, inputShape, targetDevice, inputType) = basicParamsSet;
init_input_shapes({inputShape});
if (inputType == ngraph::helpers::InputLayerType::CONSTANT) {
init_input_shapes({inputShape});
} else {
inputDynamicShapes = {inputShape.first, {}};
for (size_t i = 0; i < inputShape.second.size(); ++i) {
targetStaticShapes.push_back({inputShape.second[i], {}});
}
}
auto params = ngraph::builder::makeDynamicParams(netPrecision, {inputDynamicShapes[0]});
std::shared_ptr<ngraph::opset4::TopK> topk;
auto k = std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{}, &keepK);
topk = std::dynamic_pointer_cast<ngraph::opset4::TopK>(std::make_shared<ngraph::opset4::TopK>(params[0], k, axis, mode, sort));
if (inputType == ngraph::helpers::InputLayerType::CONSTANT) {
auto k = std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{}, &keepK);
topk = std::dynamic_pointer_cast<ngraph::opset4::TopK>(std::make_shared<ngraph::opset4::TopK>(params[0], k, axis, mode, sort));
} else {
auto k = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::Type_t::i64, inputDynamicShapes[1]);
params.push_back(k);
topk = std::dynamic_pointer_cast<ngraph::opset4::TopK>(
std::make_shared<ngraph::opset4::TopK>(params[0], k, axis, mode, sort));
}
ngraph::ResultVector results;
for (size_t i = 0; i < topk->get_output_size(); i++) {
@ -104,60 +114,41 @@ protected:
tensor = ov::test::utils::create_and_fill_tensor(funcInputs[0].get_element_type(), shape);
size_t size = tensor.get_size();
if (netPrecision == ElementType::f32 || netPrecision == ElementType::i32) {
if (netPrecision == ElementType::f32) {
std::vector<int> data(size);
// For int32, deliberately set big numbers which are not accurately representable in fp32
int start = netPrecision == ElementType::i32 ? pow(2, 30) + 1 : - static_cast<int>(size / 2);
int start = - static_cast<int>(size / 2);
std::iota(data.begin(), data.end(), start);
std::mt19937 gen(0);
std::shuffle(data.begin(), data.end(), gen);
if (netPrecision == ElementType::f32) {
auto *rawBlobDataPtr = static_cast<float *>(tensor.data());
for (size_t i = 0; i < size; ++i) {
rawBlobDataPtr[i] = static_cast<float>(data[i]);
}
} else {
auto *rawBlobDataPtr = static_cast<int32_t *>(tensor.data());
for (size_t i = 0; i < size; ++i) {
rawBlobDataPtr[i] = static_cast<int32_t>(data[i]);
}
}
} else if (netPrecision == ElementType::bf16) {
size_t O = 1, A = 1, I = 1;
A = shape[axis];
for (size_t i = 0; i < axis; i++)
O *= shape[i];
for (size_t i = axis + 1; i < shape.size(); i++)
I *= shape[i];
if (O * A * I != size)
FAIL() << "Incorrect blob shape " << shape;
auto *rawBlobDataPtr = static_cast<ngraph::bfloat16 *>(tensor.data());
for (size_t o = 0; o < O; o++) {
for (size_t i = 0; i < I; i++) {
std::vector<int> data(A);
int start = - static_cast<int>(A / 2);
std::iota(data.begin(), data.end(), start);
const size_t seed = (o + 1) * (i + 1);
std::mt19937 gen(seed);
std::shuffle(data.begin(), data.end(), gen);
for (size_t a = 0; a < A; a++) {
rawBlobDataPtr[o * A * I + a * I + i] = static_cast<ngraph::bfloat16>(data[a]);
}
}
auto *rawBlobDataPtr = static_cast<float *>(tensor.data());
for (size_t i = 0; i < size; ++i) {
rawBlobDataPtr[i] = static_cast<float>(data[i]);
}
} else {
FAIL() << "generate_inputs for " << netPrecision << " precision isn't supported";
}
inputs.insert({funcInputs[0].get_node_shared_ptr(), tensor});
if (inputType == ngraph::helpers::InputLayerType::PARAMETER) {
const auto& kPrecision = funcInputs[1].get_element_type();
const auto& kShape = targetInputStaticShapes[1];
const size_t startFrom = 1;
const size_t range = targetInputStaticShapes[0][axis];
const size_t seed = inferRequestNum++;
const auto kTensor = ov::test::utils::create_and_fill_tensor(kPrecision, kShape, range, startFrom, 1, seed);
inputs.insert({funcInputs[1].get_node_shared_ptr(), kTensor});
}
}
private:
int64_t axis;
size_t inferRequestNum = 0;
ElementType netPrecision;
bool staticShape;
ngraph::helpers::InputLayerType inputType;
};
TEST_P(TopKLayerGPUTest, CompareWithRefs) {
@ -168,14 +159,12 @@ TEST_P(TopKLayerGPUTest, CompareWithRefs) {
namespace {
std::map<std::string, std::string> emptyAdditionalConfig;
const std::vector<ElementType> netPrecisions = {
ElementType::f32,
};
const std::vector<int64_t> axes = {0, 1, 2, 3};
const std::vector<int64_t> k = {1, 5, 7, 18, 21};
const std::vector<int64_t> axes = {0, 3};
const std::vector<int64_t> k = {3, 5, 7};
const std::vector<ngraph::opset4::TopK::Mode> modes = {
ngraph::opset4::TopK::Mode::MIN,
@ -189,12 +178,12 @@ const std::vector<ngraph::opset4::TopK::SortType> sortTypes = {
std::vector<ov::test::InputShape> inputShapesDynamic = {
{
{{21, {20, 25}, 21, {20, 25}}, {{21, 21, 21, 21}, {21, 22, 21, 23}}},
{ov::PartialShape::dynamic(4), {{21, 21, 21, 21}, {21, 22, 21, 23}}}
{ov::PartialShape::dynamic(4), {{7, 7, 7, 7}, {7, 8, 7, 9}}},
{{-1, -1, -1, -1}, {{8, 9, 10, 11}, {11, 7, 8, 9}}}
}
};
INSTANTIATE_TEST_CASE_P(smoke_TopK_dynamic, TopKLayerGPUTest,
INSTANTIATE_TEST_CASE_P(smoke_TopK_constant_dynamic, TopKLayerGPUTest,
::testing::Combine(
::testing::ValuesIn(k),
::testing::ValuesIn(axes),
@ -205,7 +194,21 @@ INSTANTIATE_TEST_CASE_P(smoke_TopK_dynamic, TopKLayerGPUTest,
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapesDynamic),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(emptyAdditionalConfig)),
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT)),
TopKLayerGPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_TopK_parameter_dynamic, TopKLayerGPUTest,
::testing::Combine(
::testing::Values(1),
::testing::ValuesIn(axes),
::testing::ValuesIn(modes),
::testing::ValuesIn(sortTypes),
::testing::ValuesIn(netPrecisions),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapesDynamic),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(ngraph::helpers::InputLayerType::PARAMETER)),
TopKLayerGPUTest::getTestCaseName);
} // namespace