[GPU] Implement ExperimentalDetectronTopKROIs operation (#10208)
This commit is contained in:
@@ -207,6 +207,7 @@ REGISTER_FACTORY(v6, CTCGreedyDecoderSeqLen);
|
||||
REGISTER_FACTORY(v6, MVN);
|
||||
REGISTER_FACTORY(v6, GatherElements);
|
||||
REGISTER_FACTORY(v6, ExperimentalDetectronROIFeatureExtractor);
|
||||
REGISTER_FACTORY(v6, ExperimentalDetectronTopKROIs)
|
||||
|
||||
// ------------------------------ Supported v7 ops ------------------------------ //
|
||||
REGISTER_FACTORY(v7, Gather);
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "primitive.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief ExperimentalDetectronTopKROIs-6 primitive
|
||||
/// @details
|
||||
struct experimental_detectron_topk_rois : public primitive_base<experimental_detectron_topk_rois> {
|
||||
CLDNN_DECLARE_PRIMITIVE(experimental_detectron_topk_rois)
|
||||
|
||||
/**
|
||||
* Construct ExperimentalDetectronTopKROIs privitive.
|
||||
* @param id primitive id
|
||||
* @param inputs inputs parameters ids
|
||||
* @param max_rois maximal numbers of output ROIs.
|
||||
*/
|
||||
experimental_detectron_topk_rois(const primitive_id &id, const std::vector<primitive_id> &inputs,
|
||||
const size_t max_rois,
|
||||
const padding &output_padding = padding())
|
||||
: primitive_base(id, inputs, "", output_padding),
|
||||
max_rois(max_rois) {}
|
||||
|
||||
/// maximal numbers of output ROIs.
|
||||
size_t max_rois;
|
||||
};
|
||||
|
||||
} // namespace cldnn
|
||||
@@ -0,0 +1,44 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <experimental_detectron_topk_rois_inst.h>
|
||||
#include "primitive_type_base.h"
|
||||
#include <sstream>
|
||||
#include <json_object.h>
|
||||
#include <data_inst.h>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
primitive_type_id experimental_detectron_topk_rois::type_id() {
|
||||
static primitive_type_base<experimental_detectron_topk_rois> instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
experimental_detectron_topk_rois_inst::typed_primitive_inst(network& network, experimental_detectron_topk_rois_node const &node)
|
||||
: parent(network, node) {
|
||||
}
|
||||
|
||||
layout experimental_detectron_topk_rois_inst::calc_output_layout(experimental_detectron_topk_rois_node const &node) {
|
||||
auto primitive = node.get_primitive();
|
||||
auto input_layout = node.input(0).get_output_layout();
|
||||
|
||||
int32_t roi_num = std::min(input_layout.size.sizes()[0], static_cast<int32_t>(node.get_primitive()->max_rois));
|
||||
|
||||
return {input_layout.data_type, input_layout.format, {roi_num,
|
||||
input_layout.size.sizes()[1], 1, 1 }};
|
||||
}
|
||||
|
||||
std::string experimental_detectron_topk_rois_inst::to_string(experimental_detectron_topk_rois_node const &node) {
|
||||
auto node_info = node.desc_to_json();
|
||||
json_composite experimental_detectron_topk_rois_info;
|
||||
experimental_detectron_topk_rois_info.add("input id", node.input().id());
|
||||
experimental_detectron_topk_rois_info.add("indices id", node.input(1).id());
|
||||
experimental_detectron_topk_rois_info.add("max_rois", node.get_primitive()->max_rois);
|
||||
node_info->add("experimental detectron TopK ROIs info", experimental_detectron_topk_rois_info);
|
||||
std::stringstream primitive_description;
|
||||
node_info->dump(primitive_description);
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
@@ -0,0 +1,55 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <experimental_detectron_topk_rois_inst.h>
|
||||
#include <edtopkroi/experimental_detectron_topk_rois_ref.h>
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
#include <impls/implementation_map.hpp>
|
||||
#include <edtopkroi/experimental_detectron_topk_rois_kernel_selector.h>
|
||||
#include "primitive_base.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct experimental_detectron_topk_rois_impl : typed_primitive_impl_ocl<experimental_detectron_topk_rois> {
|
||||
using parent = typed_primitive_impl_ocl<experimental_detectron_topk_rois>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<experimental_detectron_topk_rois_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl *create(const experimental_detectron_topk_rois_node &arg) {
|
||||
auto params = get_default_params<kernel_selector::experimental_detectron_topk_roi_params>(
|
||||
arg);
|
||||
const auto& experimental_detectron_topk_rois_kernel_selector =
|
||||
kernel_selector::experimental_detectron_topk_rois_kernel_selector::Instance();
|
||||
const auto& primitive = arg.get_primitive();
|
||||
params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
|
||||
params.max_rois = primitive->max_rois;
|
||||
auto best_kernels = experimental_detectron_topk_rois_kernel_selector.GetBestKernels(params,
|
||||
kernel_selector::experimental_detectron_topk_roi_optional_params());
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
return new experimental_detectron_topk_rois_impl(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_experimental_detectron_topk_rois_impl::attach_experimental_detectron_topk_rois_impl() {
|
||||
implementation_map<experimental_detectron_topk_rois>::add(impl_types::ocl,
|
||||
experimental_detectron_topk_rois_impl::create, {
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfyx)
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
||||
@@ -29,6 +29,7 @@ void register_implementations() {
|
||||
REGISTER_OCL(detection_output);
|
||||
REGISTER_OCL(batch_to_space);
|
||||
REGISTER_OCL(experimental_detectron_roi_feature_extractor);
|
||||
REGISTER_OCL(experimental_detectron_topk_rois);
|
||||
REGISTER_OCL(eltwise);
|
||||
REGISTER_OCL(fully_connected);
|
||||
REGISTER_OCL(gather);
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "intel_gpu/primitives/detection_output.hpp"
|
||||
#include "intel_gpu/primitives/eltwise.hpp"
|
||||
#include "intel_gpu/primitives/experimental_detectron_roi_feature_extractor.hpp"
|
||||
#include "intel_gpu/primitives/experimental_detectron_topk_rois.hpp"
|
||||
#include "intel_gpu/primitives/fully_connected.hpp"
|
||||
#include "intel_gpu/primitives/gather.hpp"
|
||||
#include "intel_gpu/primitives/gather_nd.hpp"
|
||||
@@ -99,6 +100,7 @@ REGISTER_OCL(deformable_interp);
|
||||
REGISTER_OCL(depth_to_space);
|
||||
REGISTER_OCL(detection_output);
|
||||
REGISTER_OCL(experimental_detectron_roi_feature_extractor);
|
||||
REGISTER_OCL(experimental_detectron_topk_rois);
|
||||
REGISTER_OCL(eltwise);
|
||||
REGISTER_OCL(embed);
|
||||
REGISTER_OCL(fully_connected);
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "intel_gpu/primitives/experimental_detectron_topk_rois.hpp"
|
||||
#include "primitive_inst.h"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template<>
|
||||
struct typed_program_node<experimental_detectron_topk_rois> : public typed_program_node_base<experimental_detectron_topk_rois> {
|
||||
using parent = typed_program_node_base<experimental_detectron_topk_rois>;
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
const program_node &input(std::size_t index = 0) const { return get_dependency(index); }
|
||||
};
|
||||
|
||||
using experimental_detectron_topk_rois_node = typed_program_node<experimental_detectron_topk_rois>;
|
||||
|
||||
template<>
|
||||
class typed_primitive_inst<experimental_detectron_topk_rois> : public typed_primitive_inst_base<experimental_detectron_topk_rois> {
|
||||
using parent = typed_primitive_inst_base<experimental_detectron_topk_rois>;
|
||||
|
||||
public:
|
||||
static layout calc_output_layout(experimental_detectron_topk_rois_node const &node);
|
||||
|
||||
static std::string to_string(experimental_detectron_topk_rois_node const &node);
|
||||
|
||||
public:
|
||||
typed_primitive_inst(network &network, experimental_detectron_topk_rois_node const &desc);
|
||||
};
|
||||
|
||||
using experimental_detectron_topk_rois_inst = typed_primitive_inst<experimental_detectron_topk_rois>;
|
||||
|
||||
} // namespace cldnn
|
||||
@@ -76,6 +76,7 @@ enum class KernelType {
|
||||
NON_MAX_SUPPRESSION,
|
||||
DETECTION_OUTPUT,
|
||||
EXPERIMENTAL_DETECTRON_ROI_FEATURE_EXTRACTOR,
|
||||
EXPERIMENTAL_DETECTRON_TOPK_ROIS,
|
||||
CONVERT_COLOR,
|
||||
RANDOM_UNIFORM
|
||||
};
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "experimental_detectron_topk_rois_kernel_selector.h"
|
||||
#include "experimental_detectron_topk_rois_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
|
||||
experimental_detectron_topk_rois_kernel_selector &experimental_detectron_topk_rois_kernel_selector::Instance() {
|
||||
static experimental_detectron_topk_rois_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
experimental_detectron_topk_rois_kernel_selector::experimental_detectron_topk_rois_kernel_selector() {
|
||||
Attach<ExperimentalDetectronTopKROIRef>();
|
||||
}
|
||||
|
||||
KernelsData experimental_detectron_topk_rois_kernel_selector::GetBestKernels(const Params ¶ms,
|
||||
const optional_params &options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS);
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
||||
@@ -0,0 +1,22 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
/**
|
||||
* GPU kernel selector for the ExperimentalDetectronTopKROIS-6 operation
|
||||
*/
|
||||
class experimental_detectron_topk_rois_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static experimental_detectron_topk_rois_kernel_selector &Instance();
|
||||
|
||||
experimental_detectron_topk_rois_kernel_selector();
|
||||
|
||||
KernelsData GetBestKernels(const Params ¶ms, const optional_params &options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
@@ -0,0 +1,87 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
|
||||
#include "experimental_detectron_topk_rois_ref.h"
|
||||
|
||||
#include <kernel_selector_utils.h>
|
||||
#include <random>
|
||||
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
namespace {
|
||||
|
||||
|
||||
CommonDispatchData SetDefault(const experimental_detectron_topk_roi_params ¶ms, const optional_params &) {
|
||||
CommonDispatchData dispatchData;
|
||||
dispatchData.gws = {params.output.Batch().v, 1, 1};
|
||||
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
JitConstants ExperimentalDetectronTopKROIRef::GetJitConstants(const experimental_detectron_topk_roi_params ¶ms) const {
|
||||
return MakeBaseParamsJitConstants(params);
|
||||
}
|
||||
|
||||
|
||||
KernelsData ExperimentalDetectronTopKROIRef::GetKernelsData(const Params ¶ms, const optional_params &options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
KernelData kernel_data = KernelData::Default<experimental_detectron_topk_roi_params>(params);
|
||||
const experimental_detectron_topk_roi_params &new_params = dynamic_cast<const experimental_detectron_topk_roi_params &>(*kernel_data.params.get());
|
||||
|
||||
auto dispatch_data = SetDefault(new_params, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options);
|
||||
|
||||
auto experimental_detectron_topk_roi_jit = GetJitConstants(new_params);
|
||||
auto jit = CreateJit(kernelName, experimental_detectron_topk_roi_jit, entry_point);
|
||||
|
||||
FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo, kernelName, jit, entry_point, "", false,
|
||||
false, 2);
|
||||
|
||||
KernelsData kernelsData;
|
||||
kernelsData.push_back(std::move(kernel_data));
|
||||
return kernelsData;
|
||||
}
|
||||
|
||||
KernelsPriority ExperimentalDetectronTopKROIRef::GetKernelsPriority(const Params & /*params*/,
|
||||
const optional_params & /*options*/) const {
|
||||
return FORCE_PRIORITY_1;
|
||||
}
|
||||
|
||||
ParamsKey ExperimentalDetectronTopKROIRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::INT32);
|
||||
k.EnableInputDataType(Datatype::INT64);
|
||||
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableDifferentTypes();
|
||||
k.EnableOutputLayout(Tensor::bfyx);
|
||||
k.EnableInputLayout(Tensor::bfyx);
|
||||
k.EnableBatching();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ExperimentalDetectronTopKROIRef::Validate(const Params ¶ms, const optional_params &optionalParams) const {
|
||||
if (params.GetType() != KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS ||
|
||||
optionalParams.GetType() != KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const experimental_detectron_topk_roi_params &new_params = dynamic_cast<const experimental_detectron_topk_roi_params &>(params);
|
||||
if (new_params.inputs.size() != 2) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
||||
@@ -0,0 +1,42 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
/**
|
||||
* ExperimentalDetectronTopKROIs kernel params.
|
||||
*/
|
||||
struct experimental_detectron_topk_roi_params : public base_params {
|
||||
experimental_detectron_topk_roi_params() : base_params(KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS) {}
|
||||
|
||||
size_t max_rois = 0; // maximal numbers of output ROIs.
|
||||
};
|
||||
|
||||
struct experimental_detectron_topk_roi_optional_params : public optional_params {
|
||||
experimental_detectron_topk_roi_optional_params() : optional_params(KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* Reference GPU kernel for the ExperimentalDetectronTopKROIs-6 operation to set output by indices sorted before.
|
||||
*/
|
||||
class ExperimentalDetectronTopKROIRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
ExperimentalDetectronTopKROIRef() : KernelBaseOpenCL("experimental_detectron_topk_rois_ref") {}
|
||||
|
||||
private:
|
||||
virtual JitConstants GetJitConstants(const experimental_detectron_topk_roi_params ¶ms) const;
|
||||
|
||||
KernelsData GetKernelsData(const Params ¶ms, const optional_params &options) const override;
|
||||
|
||||
KernelsPriority GetKernelsPriority(const Params ¶ms, const optional_params &options) const override;
|
||||
|
||||
bool Validate(const Params ¶ms, const optional_params &optionalParams) const override;
|
||||
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
@@ -0,0 +1,19 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
KERNEL(experimental_detectron_topk_rois_ref)(const __global INPUT0_TYPE* input_rois,
|
||||
const __global INPUT1_TYPE* topk_indices, __global OUTPUT_TYPE* output_rois)
|
||||
{
|
||||
const uint b = get_global_id(0);
|
||||
const uint output_idx = OUTPUT_GET_INDEX(b, 0, 0, 0);
|
||||
const uint roi_idx = topk_indices[b];
|
||||
const uint input_idx = INPUT0_GET_INDEX(roi_idx, 0, 0, 0);
|
||||
output_rois[output_idx] = input_rois[input_idx];
|
||||
output_rois[output_idx + 1] = input_rois[input_idx + 1];
|
||||
output_rois[output_idx + 2] = input_rois[input_idx + 2];
|
||||
output_rois[output_idx + 3] = input_rois[input_idx + 3];
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/experimental_detectron_topkrois.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/experimental_detectron_topk_rois.hpp"
|
||||
#include "intel_gpu/primitives/arg_max_min.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
void CreateExperimentalDetectronTopKROIsOp(Program &p,
|
||||
const std::shared_ptr<ngraph::op::v6::ExperimentalDetectronTopKROIs> &op) {
|
||||
p.ValidateInputs(op, {2});
|
||||
auto input_primitives = p.GetInputPrimitiveIDs(op);
|
||||
auto max_rois = op->get_max_rois();
|
||||
auto layer_name = layer_type_name_ID(op);
|
||||
auto argmax_layer_name = layer_name + "_topk";
|
||||
auto top_k_indices = arg_max_min(argmax_layer_name,
|
||||
{input_primitives[1]}, arg_max_min::max, max_rois, arg_max_min::batch,
|
||||
arg_max_min::sort_by_values, false, "", cldnn::padding(), cldnn::data_types::i32);
|
||||
|
||||
|
||||
p.AddPrimitive(top_k_indices);
|
||||
p.AddInnerPrimitiveToProfiler(top_k_indices, argmax_layer_name, op);
|
||||
|
||||
auto experimental_detectron_topk_layer = cldnn::experimental_detectron_topk_rois(layer_name,
|
||||
{input_primitives[0],
|
||||
argmax_layer_name}, max_rois);
|
||||
|
||||
p.AddPrimitive(experimental_detectron_topk_layer);
|
||||
p.AddPrimitiveToProfiler(experimental_detectron_topk_layer, op);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_FACTORY_IMPL(v6, ExperimentalDetectronTopKROIs);
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
@@ -965,3 +965,51 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) {
|
||||
EXPECT_EQ(out_buffer[i], ref_vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(top_k_layer_tests, sort_probabilities_by_indices) {
|
||||
static const int32_t x_size = 10, y_size = 1, feature_num = 1, batch_num = 1;
|
||||
auto& engine = get_test_engine();
|
||||
const int top_k = 5;
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(arg_max_min("arg_max", { "input"}, arg_max_min::max, top_k, arg_max_min::x,
|
||||
arg_max_min::sort_by_values, false, "", padding(), data_types::i32));
|
||||
|
||||
std::vector<float> input_vec = {
|
||||
0.9f,
|
||||
0.1f,
|
||||
0.2f,
|
||||
0.8f,
|
||||
0.5f,
|
||||
0.6f,
|
||||
0.3f,
|
||||
0.4f,
|
||||
0.7f,
|
||||
0.95f
|
||||
};
|
||||
|
||||
std::vector<int> ref_vec = {
|
||||
9, 0, 3, 8, 5
|
||||
};
|
||||
|
||||
set_values(input, input_vec);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
auto outputs = network.execute();
|
||||
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
EXPECT_EQ(outputs.begin()->first, "arg_max");
|
||||
const int out_size = top_k;
|
||||
auto output = outputs.at("arg_max").get_memory();
|
||||
cldnn::mem_lock<int> output_ptr(output, get_test_stream());
|
||||
int out_buffer[out_size];
|
||||
for (uint32_t i = 0; i < out_size; i++) {
|
||||
out_buffer[i] = get_value<int>(output_ptr.data(), i);
|
||||
}
|
||||
for (int i = 0; i < out_size; i++) {
|
||||
EXPECT_EQ(out_buffer[i], ref_vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
// Copyright (C) 2021-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils.h"
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/activation.hpp>
|
||||
#include <intel_gpu/primitives/experimental_detectron_topk_rois.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
TEST(experimental_detectron_topk_rois_gpu_fp32, check_set_indices_layer) {
|
||||
auto &engine = get_test_engine();
|
||||
|
||||
const int rois_num = 2;
|
||||
|
||||
auto roi_input = engine.allocate_memory(
|
||||
{data_types::f32, format::bfyx, tensor(batch(4), feature(4))});
|
||||
auto roi_indices = engine.allocate_memory({data_types::i32, format::bfyx, tensor(batch(rois_num), feature(1))});
|
||||
|
||||
std::vector<float> rois{1.0f, 1.0f, 4.0f, 5.0f,
|
||||
3.0f, 2.0f, 7.0f, 9.0f,
|
||||
10.0f, 15.0f, 13.0f, 17.0f,
|
||||
13.0f, 10.0f, 18.0f, 15.0f};
|
||||
set_values(roi_input, rois);
|
||||
set_values(roi_indices,
|
||||
{3, 1});
|
||||
|
||||
const std::string input_rois_id = "InputRois";
|
||||
const std::string input_indices_id = "InputIndices";;
|
||||
const std::string experimental_detectron_topk_rois_id = "experimental_detectron_topk_rois";
|
||||
topology topology;
|
||||
topology.add(input_layout(input_rois_id, roi_input->get_layout()));
|
||||
topology.add(input_layout(input_indices_id, roi_indices->get_layout()));
|
||||
|
||||
topology.add(experimental_detectron_topk_rois(experimental_detectron_topk_rois_id,
|
||||
{input_rois_id, input_indices_id}, rois_num));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data(input_rois_id, roi_input);
|
||||
network.set_input_data(input_indices_id, roi_indices);
|
||||
|
||||
auto result = network.execute();
|
||||
|
||||
std::vector<float> expected_output{13.0f, 10.0f, 18.0f, 15.0f,
|
||||
3.0f, 2.0f, 7.0f, 9.0f};
|
||||
|
||||
auto out_mem = result.at(experimental_detectron_topk_rois_id).get_memory();
|
||||
cldnn::mem_lock<float> out_ptr(out_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(expected_output.size(), out_ptr.size());
|
||||
for (size_t i = 0; i < expected_output.size(); ++i) {
|
||||
EXPECT_NEAR(expected_output[i], out_ptr[i], 0.0001) << "at i = " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(experimental_detectron_topk_rois_gpu_fp32, check_set_indices_layer_model_less_than_k) {
|
||||
auto &engine = get_test_engine();
|
||||
// topk is more than model size
|
||||
const int rois_num = 3;
|
||||
|
||||
auto roi_input = engine.allocate_memory(
|
||||
{data_types::f32, format::bfyx, tensor(batch(2), feature(4))});
|
||||
auto roi_indices = engine.allocate_memory({data_types::i32, format::bfyx, tensor(batch(2), feature(1))});
|
||||
|
||||
std::vector<float> rois{1.0f, 1.0f, 4.0f, 5.0f,
|
||||
3.0f, 2.0f, 7.0f, 9.0f};
|
||||
set_values(roi_input, rois);
|
||||
set_values(roi_indices,
|
||||
{1, 0});
|
||||
|
||||
const std::string input_rois_id = "InputRois";
|
||||
const std::string input_indices_id = "InputIndices";;
|
||||
const std::string experimental_detectron_topk_rois_id = "experimental_detectron_topk_rois";
|
||||
topology topology;
|
||||
topology.add(input_layout(input_rois_id, roi_input->get_layout()));
|
||||
topology.add(input_layout(input_indices_id, roi_indices->get_layout()));
|
||||
|
||||
topology.add(experimental_detectron_topk_rois(experimental_detectron_topk_rois_id,
|
||||
{input_rois_id, input_indices_id}, rois_num));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data(input_rois_id, roi_input);
|
||||
network.set_input_data(input_indices_id, roi_indices);
|
||||
|
||||
auto result = network.execute();
|
||||
|
||||
std::vector<float> expected_output{3.0f, 2.0f, 7.0f, 9.0f,
|
||||
1.0f, 1.0f, 4.0f, 5.0f};
|
||||
|
||||
|
||||
auto out_mem = result.at(experimental_detectron_topk_rois_id).get_memory();
|
||||
cldnn::mem_lock<float> out_ptr(out_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(expected_output.size(), out_ptr.size());
|
||||
for (size_t i = 0; i < expected_output.size(); ++i) {
|
||||
EXPECT_NEAR(expected_output[i], out_ptr[i], 0.0001) << "at i = " << i;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include "single_layer_tests/experimental_detectron_topkrois.hpp"
|
||||
|
||||
using namespace ov::test;
|
||||
using namespace ov::test::subgraph;
|
||||
|
||||
namespace {
|
||||
std::vector<int64_t> maxRois {
|
||||
1000,
|
||||
1500,
|
||||
2000
|
||||
};
|
||||
|
||||
std::vector<ElementType> elementTypes {
|
||||
ElementType::f16,
|
||||
ElementType::f32
|
||||
};
|
||||
|
||||
const std::vector<std::vector<InputShape>> staticInputShape = {
|
||||
static_shapes_to_test_representation({{3000, 4}, {3000}}),
|
||||
static_shapes_to_test_representation({{4200, 4}, {4200}}),
|
||||
static_shapes_to_test_representation({{4500, 4}, {4500}})
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ExperimentalDetectronTopKROIs_static, ExperimentalDetectronTopKROIsLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(staticInputShape),
|
||||
::testing::ValuesIn(maxRois),
|
||||
::testing::ValuesIn(elementTypes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
ExperimentalDetectronTopKROIsLayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
@@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <shared_test_classes/base/ov_subgraph.hpp>
|
||||
#include "ngraph/ops.hpp"
|
||||
|
||||
#include "functional_test_utils/ov_tensor_utils.hpp"
|
||||
@@ -628,6 +629,42 @@ ov::runtime::Tensor generate(const std::shared_ptr<ngraph::op::v5::NonMaxSuppres
|
||||
}
|
||||
}
|
||||
|
||||
template<ov::element::Type_t elemType>
|
||||
ov::runtime::Tensor generate_unique_possibilities(const ov::Shape &targetShape) {
|
||||
using value_type = typename element_type_traits<elemType>::value_type;
|
||||
ov::runtime::Tensor tensor = ov::runtime::Tensor(elemType, targetShape);
|
||||
const size_t k = targetShape[0];
|
||||
std::vector<size_t> indices(k);
|
||||
std::iota(indices.begin(), indices.end(), 0lu);
|
||||
std::default_random_engine random;
|
||||
std::shuffle(indices.begin(), indices.end(), random);
|
||||
|
||||
auto dataPtr = tensor.data<value_type>();
|
||||
for (size_t i = 0; i < k; ++i) {
|
||||
// our goal is to have unique values for both f32 and f16 to avoid false failures because of the same possibilities
|
||||
dataPtr[i] = ov::float16::from_bits(static_cast< uint16_t>(indices[i]));
|
||||
}
|
||||
return tensor;
|
||||
}
|
||||
|
||||
ov::runtime::Tensor generate(const std::shared_ptr<ngraph::op::v6::ExperimentalDetectronTopKROIs>& node,
|
||||
size_t port,
|
||||
const ov::element::Type& elemType,
|
||||
const ov::Shape& targetShape) {
|
||||
if (port == 1) {
|
||||
switch (elemType) {
|
||||
case element::Type_t::f16:
|
||||
return generate_unique_possibilities<element::Type_t::f16>(targetShape);
|
||||
case element::Type_t::f32:
|
||||
return generate_unique_possibilities<element::Type_t::f32>(targetShape);
|
||||
default:
|
||||
OPENVINO_UNREACHABLE("Unsupported element type: ", elemType);
|
||||
}
|
||||
}
|
||||
return generate(std::dynamic_pointer_cast<ov::Node>(node), port, elemType, targetShape);
|
||||
}
|
||||
|
||||
|
||||
ov::runtime::Tensor generate(const std::shared_ptr<ngraph::op::v5::RNNSequence>& node,
|
||||
size_t port,
|
||||
const ov::element::Type& elemType,
|
||||
|
||||
Reference in New Issue
Block a user