From ed65d199bb3365c9bb296ce1ec1c1acda81b096a Mon Sep 17 00:00:00 2001 From: Konstantin Beluchenko Date: Thu, 21 Jul 2022 12:52:32 +0300 Subject: [PATCH] [GPU] ROIAlign v9 support (#11899) * ROIAlign v9 support * Code changes after review1 * Code changes after review2 * fix of single layer test for Windows * Since PR #12043 we don't need strong include order of primitive_base.hpp and impls/implementation map.hpp anymore * Code changes after review3 * Code changes after review4 --- .../intel_gpu/plugin/primitives_list.hpp | 1 + .../intel_gpu/primitives/roi_align.hpp | 31 +-- .../src/graph/impls/implementation_map.hpp | 16 ++ .../src/graph/impls/ocl/roi_align.cpp | 52 +++-- src/plugins/intel_gpu/src/graph/program.cpp | 7 +- src/plugins/intel_gpu/src/graph/roi_align.cpp | 27 ++- .../roi_align/roi_align_kernel_ref.cpp | 35 +++- .../roi_align/roi_align_kernel_ref.h | 7 +- .../core/cl_kernels/roi_align_ref.cl | 116 ++++++------ .../intel_gpu/src/plugin/ops/roi_align.cpp | 52 ++++- .../tests/test_cases/roi_align_gpu_test.cpp | 178 ++++++++++++++++++ .../single_layer_tests/roi_align.cpp | 122 ++++++++---- .../include/single_layer_tests/roi_align.hpp | 4 + .../single_layer/roi_align.hpp | 28 +++ .../src/single_layer/roi_align.cpp | 132 +++++++++++-- 15 files changed, 639 insertions(+), 169 deletions(-) create mode 100644 src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index caea06ef284..5383be3c67c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -235,6 +235,7 @@ REGISTER_FACTORY(v8, AdaptiveMaxPool); // ------------------------------ Supported v9 ops ------------------------------ // REGISTER_FACTORY(v9, SoftSign) +REGISTER_FACTORY(v9, ROIAlign); // --------------------------- Supported internal ops --------------------------- // REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/roi_align.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/roi_align.hpp index dfeb33224cb..a52011b5881 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/roi_align.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/roi_align.hpp @@ -19,10 +19,10 @@ struct roi_align : public primitive_base { CLDNN_DECLARE_PRIMITIVE(roi_align) /// @brief Pooling mode for the @ref roi_align - enum PoolingMode { - Max, - Avg - }; + enum PoolingMode { max, avg }; + + /// @brief Aligned mode for the @ref roi_align + enum AlignedMode { asymmetric, half_pixel_for_nn, half_pixel }; /// @brief Constructs roi_align primitive. /// @param id This primitive id. @@ -32,24 +32,25 @@ struct roi_align : public primitive_base { /// @param sampling_ratio Number of bins over height and width to use to calculate each output feature map element. /// @param spatial_scale multiplicative spatial scale factor to translate ROI coordinates /// from their input spatial scale to the scale used when pooling. - /// @param mode Method to perform pooling to produce output feature map elements. - /// @param shrink_axis_mask Array of bits, that provide shrinks the dimensionality by 1, taking on the value at index begin[i]. + /// @param pooling_mode Method to perform pooling to produce output feature map elements. + /// @param aligned_mode Method to coordinates alignment. roi_align(const primitive_id& id, const std::vector& inputs, int pooled_h, int pooled_w, int sampling_ratio, float spatial_scale, - PoolingMode mode, + PoolingMode pooling_mode, + AlignedMode aligned_mode, const primitive_id& ext_prim_id = "", const padding& output_padding = padding()) : primitive_base(id, inputs, ext_prim_id, output_padding), - pooled_h {pooled_h}, - pooled_w {pooled_w}, - sampling_ratio {sampling_ratio}, - spatial_scale {spatial_scale}, - mode {mode} - {} + pooled_h{pooled_h}, + pooled_w{pooled_w}, + sampling_ratio{sampling_ratio}, + spatial_scale{spatial_scale}, + pooling_mode{pooling_mode}, + aligned_mode{aligned_mode} {} /// @brief Height of the ROI output feature map. int pooled_h; @@ -61,7 +62,9 @@ struct roi_align : public primitive_base { /// from their input spatial scale to the scale used when pooling. float spatial_scale; /// @brief Method to perform pooling to produce output feature map elements. - PoolingMode mode; + PoolingMode pooling_mode; + /// @brief Method to coordinate alignment. + AlignedMode aligned_mode; }; /// @} /// @} diff --git a/src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp b/src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp index 1aeab6be1ab..ed23803e323 100644 --- a/src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/implementation_map.hpp @@ -204,11 +204,27 @@ public: return false; } + static void add(impl_types impl_type, factory_type factory, + const std::vector& types, const std::vector& formats) { + add(impl_type, factory, combine(types, formats)); + } + static void add(impl_types impl_type, factory_type factory, std::set keys) { if (impl_type == impl_types::any) { throw std::runtime_error("[CLDNN] Can't register impl with type any"); } map_type::instance().insert({impl_type, {keys, factory}}); } + +private: + static std::set combine(const std::vector& types, const std::vector& formats) { + std::set keys; + for (const auto& type : types) { + for (const auto& format : formats) { + keys.emplace(type, format); + } + } + return keys; + } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp index 8c82fc62428..19b574383ce 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/roi_align.cpp @@ -1,13 +1,14 @@ // Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "roi_align_inst.h" + #include "primitive_base.hpp" #include "impls/implementation_map.hpp" #include "intel_gpu/runtime/error_handler.hpp" #include "kernel_selector_helper.h" -#include "roi_align/roi_align_kernel_selector.h" #include "roi_align/roi_align_kernel_ref.h" +#include "roi_align/roi_align_kernel_selector.h" +#include "roi_align_inst.h" namespace cldnn { namespace ocl { @@ -15,11 +16,23 @@ namespace ocl { namespace { kernel_selector::pool_type from(roi_align::PoolingMode mode) { switch (mode) { - case roi_align::PoolingMode::Max: - return kernel_selector::pool_type::MAX; - default: - case roi_align::PoolingMode::Avg: - return kernel_selector::pool_type::AVG; + case roi_align::PoolingMode::max: + return kernel_selector::pool_type::MAX; + default: + case roi_align::PoolingMode::avg: + return kernel_selector::pool_type::AVG; + } +} + +kernel_selector::roi_aligned_mode from(roi_align::AlignedMode mode) { + switch (mode) { + case roi_align::AlignedMode::half_pixel_for_nn: + return kernel_selector::roi_aligned_mode::HALF_PIXEL_FOR_NN; + case roi_align::AlignedMode::half_pixel: + return kernel_selector::roi_aligned_mode::HALF_PIXEL; + default: + case roi_align::AlignedMode::asymmetric: + return kernel_selector::roi_aligned_mode::ASYMMETRIC; } } } // namespace @@ -35,8 +48,8 @@ struct roi_align_impl : typed_primitive_impl_ocl { protected: kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { kernel_arguments_data args; - args.inputs = { instance.input_memory_ptr(), instance.rois_memory(), instance.batches_memory() }; - args.outputs = { instance.output_memory_ptr() }; + args.inputs = {instance.input_memory_ptr(), instance.rois_memory(), instance.batches_memory()}; + args.outputs = {instance.output_memory_ptr()}; return args; } @@ -67,10 +80,10 @@ public: auto roi_align_optional_params = get_default_optional_params(arg.get_program()); - const auto roi_bfyx = convert_data_tensor(rois_layout); - roi_align_params.inputs.push_back(roi_bfyx.FlattenFeatureAndSpatials()); + roi_align_params.inputs.push_back(convert_data_tensor(rois_layout)); roi_align_params.inputs.push_back(convert_data_tensor(batches_layout)); - roi_align_params.mode = from(primitive->mode); + roi_align_params.pooling_mode = from(primitive->pooling_mode); + roi_align_params.aligned_mode = from(primitive->aligned_mode); roi_align_params.sampling_ratio = primitive->sampling_ratio; roi_align_params.spatial_scale = primitive->spatial_scale; @@ -91,11 +104,16 @@ public: namespace detail { attach_roi_align_impl::attach_roi_align_impl() { - implementation_map::add(impl_types::ocl, roi_align_impl::create, - { - std::make_tuple(data_types::f16, format::bfyx), - std::make_tuple(data_types::f32, format::bfyx), - }); + auto types = {data_types::f16, data_types::f32, data_types::i8, data_types::u8, data_types::i32}; + + auto formats = {format::bfyx, + format::b_fs_yx_fsv16, + format::b_fs_yx_fsv32, + format::bs_fs_yx_bsv16_fsv16, + format::bs_fs_yx_bsv32_fsv16, + format::bs_fs_yx_bsv32_fsv32}; + + implementation_map::add(impl_types::ocl, roi_align_impl::create, types, formats); } } // namespace detail diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 4b302428fc7..a3818506304 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1400,7 +1400,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::mvn::type_id() && prim.type() != cldnn::gather::type_id() && prim.type() != cldnn::scatter_nd_update::type_id() && - prim.type() != cldnn::non_max_suppression::type_id()) { + prim.type() != cldnn::non_max_suppression::type_id() && + prim.type() != cldnn::roi_align::type_id()) { can_use_fsv16 = false; } @@ -1428,8 +1429,10 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::generic_layer::type_id() && prim.type() != cldnn::scatter_nd_update::type_id() && prim.type() != cldnn::quantize::type_id() && - prim.type() != cldnn::non_max_suppression::type_id()) + prim.type() != cldnn::non_max_suppression::type_id() && + prim.type() != cldnn::roi_align::type_id()) { can_use_bs_fs_yx_bsv16_fsv16 = false; + } } size_t total_conv_layers = lo.get_total_conv_count(); diff --git a/src/plugins/intel_gpu/src/graph/roi_align.cpp b/src/plugins/intel_gpu/src/graph/roi_align.cpp index d89c5de53cb..61375c10f94 100644 --- a/src/plugins/intel_gpu/src/graph/roi_align.cpp +++ b/src/plugins/intel_gpu/src/graph/roi_align.cpp @@ -6,6 +6,7 @@ #include "primitive_type_base.h" #include #include +#include "openvino/core/enum_names.hpp" namespace cldnn { @@ -23,7 +24,9 @@ layout roi_align_inst::calc_output_layout(roi_align_node const& node) { auto rois_layout = node.input(1).get_output_layout(); auto num_rois = rois_layout.batch(); auto num_channels = input_layout.feature(); - return layout(input_layout.data_type, format::bfyx, {num_rois, num_channels, primitive->pooled_h, primitive->pooled_w}); + return layout(input_layout.data_type, + input_layout.format, + {num_rois, num_channels, primitive->pooled_h, primitive->pooled_w}); } std::string roi_align_inst::to_string(roi_align_node const& node) { @@ -36,11 +39,29 @@ std::string roi_align_inst::to_string(roi_align_node const& node) { roi_align_info.add("pooled_w", node.get_primitive()->pooled_w); roi_align_info.add("sampling_ratio", node.get_primitive()->sampling_ratio); roi_align_info.add("spatial_scale", node.get_primitive()->spatial_scale); - roi_align_info.add("mode", node.get_primitive()->mode == roi_align::PoolingMode::Max ? "Max" : "Avg"); + roi_align_info.add("pooling_mode", ov::as_string(node.get_primitive()->pooling_mode)); + roi_align_info.add("aligned_mode", ov::as_string(node.get_primitive()->aligned_mode)); node_info->add("roi_align info", roi_align_info); std::stringstream primitive_description; node_info->dump(primitive_description); return primitive_description.str(); } -} // namespace cldnn +} // namespace cldnn + +namespace ov { +template <> EnumNames& EnumNames::get() { + static auto enum_names = + EnumNames("PoolingMode", {{"max", roi_align::PoolingMode::max}, + {"avg", roi_align::PoolingMode::avg}}); + return enum_names; +} + +template <> EnumNames& EnumNames::get() { + static auto enum_names = + EnumNames("AlignedMode", {{"asymmetric", roi_align::AlignedMode::asymmetric}, + {"half_pixel_for_nn", roi_align::AlignedMode::half_pixel_for_nn}, + {"half_pixel", roi_align::AlignedMode::half_pixel}}); + return enum_names; +} +} // namespace ov diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/roi_align/roi_align_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/roi_align/roi_align_kernel_ref.cpp index 61c503bac1e..aaab0910be7 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/roi_align/roi_align_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/roi_align/roi_align_kernel_ref.cpp @@ -10,11 +10,13 @@ ParamsKey ROIAlignKernelRef::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::UINT8); + k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::INT32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); - k.EnableInputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::bfyx); + k.EnableAllInputLayout(); + k.EnableAllOutputLayout(); k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableBatching(); @@ -70,21 +72,36 @@ bool ROIAlignKernelRef::Validate(const Params& p, const optional_params& o) cons if (params.inputs.size() != 3) return false; - if (params.outputs[0].Dimentions() > 4 || params.inputs[0].Dimentions() > 4 || params.inputs[1].Dimentions() > 2) - return false; - return true; } -JitConstants ROIAlignKernelRef::GetJitConstants(const roi_align_params ¶ms) const { +JitConstants ROIAlignKernelRef::GetJitConstants(const roi_align_params& params) const { JitConstants jit = MakeBaseParamsJitConstants(params); + jit.AddConstant(MakeJitConstant("SPATIAL_SCALE", params.spatial_scale)); jit.AddConstant(MakeJitConstant("SAMPLING_RATIO", params.sampling_ratio)); - if (params.mode == PoolType::MAX) + + if (params.pooling_mode == PoolType::MAX) { jit.AddConstant(MakeJitConstant("MAX_POOL", true)); - else if (params.mode == PoolType::AVG) + } else if (params.pooling_mode == PoolType::AVG) { jit.AddConstant(MakeJitConstant("AVG_POOL", true)); + } + + if (params.aligned_mode == roi_aligned_mode::ASYMMETRIC) { + jit.AddConstant(MakeJitConstant("OFFSET_SRC", 0.f)); + jit.AddConstant(MakeJitConstant("OFFSET_DST", 0.f)); + jit.AddConstant(MakeJitConstant("MIN_SIZE", 1.0f)); + } else if (params.aligned_mode == roi_aligned_mode::HALF_PIXEL_FOR_NN) { + jit.AddConstant(MakeJitConstant("OFFSET_SRC", 0.f)); + jit.AddConstant(MakeJitConstant("OFFSET_DST", -0.5f)); + jit.AddConstant(MakeJitConstant("MIN_SIZE", 0.f)); + } else if (params.aligned_mode == roi_aligned_mode::HALF_PIXEL) { + jit.AddConstant(MakeJitConstant("OFFSET_SRC", 0.5f)); + jit.AddConstant(MakeJitConstant("OFFSET_DST", -0.5f)); + jit.AddConstant(MakeJitConstant("MIN_SIZE", 0.f)); + } + return jit; } -} // namespace kernel_selector +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/roi_align/roi_align_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/roi_align/roi_align_kernel_ref.h index e719740afb5..310dbfe13f7 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/roi_align/roi_align_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/roi_align/roi_align_kernel_ref.h @@ -7,16 +7,19 @@ namespace kernel_selector { +enum class roi_aligned_mode : uint32_t { ASYMMETRIC, HALF_PIXEL_FOR_NN, HALF_PIXEL }; + struct roi_align_params : public base_params { roi_align_params() : base_params{KernelType::ROI_ALIGN} {} int sampling_ratio = 0; float spatial_scale = 1.f; - PoolType mode = PoolType::MAX; + PoolType pooling_mode = PoolType::MAX; + roi_aligned_mode aligned_mode = roi_aligned_mode::ASYMMETRIC; ParamsKey GetParamsKey() const override { auto k = base_params::GetParamsKey(); - k.EnablePoolType(mode); + k.EnablePoolType(pooling_mode); return k; } }; diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/roi_align_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/roi_align_ref.cl index 1db3dc55cd8..234f9766f2d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/roi_align_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/roi_align_ref.cl @@ -5,99 +5,103 @@ #include "include/batch_headers/common.cl" #include "include/batch_headers/data_types.cl" -#define MAX(a,b) ((a) > (b) ? (a) : (b)) -#define NUM_ROIS OUTPUT_BATCH_NUM -#define NUM_CHANNELS INPUT0_FEATURE_NUM -#define POOLED_WIDTH OUTPUT_SIZE_X +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define NUM_ROIS OUTPUT_BATCH_NUM +#define NUM_CHANNELS INPUT0_FEATURE_NUM +#define POOLED_WIDTH OUTPUT_SIZE_X #define POOLED_HEIGHT OUTPUT_SIZE_Y KERNEL(roi_align_ref) -( - const __global INPUT0_TYPE * src_data, - __global OUTPUT_TYPE * dst_data, - const __global INPUT1_TYPE * src_rois, - const __global INPUT2_TYPE * src_batches -) -{ +(const __global INPUT0_TYPE* src_data, + __global OUTPUT_TYPE* dst_data, + const __global INPUT1_TYPE* src_rois, + const __global INPUT2_TYPE* src_batches) { const size_t i = get_global_id(0); const uint x = i % POOLED_WIDTH; const uint y = i / POOLED_WIDTH % POOLED_HEIGHT; const uint c = i / POOLED_WIDTH / POOLED_HEIGHT % NUM_CHANNELS; const uint r = i / POOLED_WIDTH / POOLED_HEIGHT / NUM_CHANNELS % NUM_ROIS; - - const __global INPUT1_TYPE* roi_ptr = &src_rois[INPUT1_BATCH_PITCH * r]; - + + const __global INPUT1_TYPE* roi_ptr = &src_rois[INPUT1_GET_INDEX(r, 0, 0, 0)]; + // Get ROI`s corners - const INPUT1_TYPE x1 = *roi_ptr * (INPUT1_TYPE) SPATIAL_SCALE; - const INPUT1_TYPE y1 = roi_ptr[1] * (INPUT1_TYPE) SPATIAL_SCALE; - const INPUT1_TYPE x2 = roi_ptr[2] * (INPUT1_TYPE) SPATIAL_SCALE; - const INPUT1_TYPE y2 = roi_ptr[3] * (INPUT1_TYPE) SPATIAL_SCALE; - - const INPUT1_TYPE roi_width = MAX(x2 - x1, (INPUT1_TYPE) 1.0); - const INPUT1_TYPE roi_height = MAX(y2 - y1, (INPUT1_TYPE) 1.0); + const INPUT1_TYPE x1 = + (roi_ptr[0] + (INPUT1_TYPE)OFFSET_SRC) * (INPUT1_TYPE)SPATIAL_SCALE + (INPUT1_TYPE)OFFSET_DST; + const INPUT1_TYPE y1 = + (roi_ptr[1] + (INPUT1_TYPE)OFFSET_SRC) * (INPUT1_TYPE)SPATIAL_SCALE + (INPUT1_TYPE)OFFSET_DST; + const INPUT1_TYPE x2 = + (roi_ptr[2] + (INPUT1_TYPE)OFFSET_SRC) * (INPUT1_TYPE)SPATIAL_SCALE + (INPUT1_TYPE)OFFSET_DST; + const INPUT1_TYPE y2 = + (roi_ptr[3] + (INPUT1_TYPE)OFFSET_SRC) * (INPUT1_TYPE)SPATIAL_SCALE + (INPUT1_TYPE)OFFSET_DST; + + + const INPUT1_TYPE roi_width = MAX(x2 - x1, (INPUT1_TYPE)MIN_SIZE); + const INPUT1_TYPE roi_height = MAX(y2 - y1, (INPUT1_TYPE)MIN_SIZE); const INPUT1_TYPE bin_width = roi_width / POOLED_WIDTH; const INPUT1_TYPE bin_height = roi_height / POOLED_HEIGHT; - const int sampling_ratio_x = SAMPLING_RATIO == 0 ? (int) ceil(bin_width) : SAMPLING_RATIO; - const int sampling_ratio_y = SAMPLING_RATIO == 0 ? (int) ceil(bin_height) : SAMPLING_RATIO; + const int sampling_ratio_x = SAMPLING_RATIO == 0 ? (int)ceil(bin_width) : SAMPLING_RATIO; + const int sampling_ratio_y = SAMPLING_RATIO == 0 ? (int)ceil(bin_height) : SAMPLING_RATIO; + + const INPUT1_TYPE sample_distance_x = bin_width / (INPUT1_TYPE)sampling_ratio_x; + const INPUT1_TYPE sample_distance_y = bin_height / (INPUT1_TYPE)sampling_ratio_y; + + const __global INPUT0_TYPE* data = src_data + INPUT0_GET_INDEX(r, c, 0, 0); - const INPUT1_TYPE sample_distance_x = bin_width / (INPUT1_TYPE) sampling_ratio_x; - const INPUT1_TYPE sample_distance_y = bin_height / (INPUT1_TYPE) sampling_ratio_y; - - const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + r*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*c; OUTPUT_TYPE pooled_value = 0; for (unsigned int y_sample_ind = 0; y_sample_ind < sampling_ratio_y; y_sample_ind++) { - INPUT1_TYPE sample_y = y1 + (INPUT1_TYPE) y * bin_height + - sample_distance_y * ((INPUT1_TYPE) y_sample_ind + (INPUT1_TYPE) 0.5f); + INPUT1_TYPE sample_y = + y1 + (INPUT1_TYPE)y * bin_height + sample_distance_y * ((INPUT1_TYPE)y_sample_ind + (INPUT1_TYPE)0.5f); for (unsigned int x_sample_ind = 0; x_sample_ind < sampling_ratio_x; x_sample_ind++) { - INPUT1_TYPE sample_x = x1 + (INPUT1_TYPE) x * bin_width + - sample_distance_x * ((INPUT1_TYPE) x_sample_ind + (INPUT1_TYPE) 0.5f); + INPUT1_TYPE sample_x = + x1 + (INPUT1_TYPE)x * bin_width + sample_distance_x * ((INPUT1_TYPE)x_sample_ind + (INPUT1_TYPE)0.5f); unsigned int sample_y_low = 0; unsigned int sample_x_low = 0; unsigned int sample_y_high = 0; unsigned int sample_x_high = 0; - INPUT1_TYPE weight_left = (INPUT1_TYPE) 0.f; - INPUT1_TYPE weight_right = (INPUT1_TYPE) 0.f; - INPUT1_TYPE weight_top = (INPUT1_TYPE) 0.f; - INPUT1_TYPE weight_bottom = (INPUT1_TYPE) 0.f; + INPUT1_TYPE weight_left = INPUT1_VAL_ZERO; + INPUT1_TYPE weight_right = INPUT1_VAL_ZERO; + INPUT1_TYPE weight_top = INPUT1_VAL_ZERO; + INPUT1_TYPE weight_bottom = INPUT1_VAL_ZERO; if (sample_x >= -1.0 || sample_x <= INPUT0_SIZE_X || sample_y >= -1.0 || sample_y <= INPUT0_SIZE_Y) { - sample_x = MAX(sample_x, (INPUT1_TYPE) 0.f); - sample_y = MAX(sample_y, (INPUT1_TYPE) 0.f); + sample_x = MAX(sample_x, INPUT1_VAL_ZERO); + sample_y = MAX(sample_y, INPUT1_VAL_ZERO); - sample_y_low = (unsigned int) sample_y; - sample_x_low = (unsigned int) sample_x; + sample_y_low = (unsigned int)sample_y; + sample_x_low = (unsigned int)sample_x; if (sample_y_low >= INPUT0_SIZE_Y - 1) { sample_y_high = sample_y_low = INPUT0_SIZE_Y - 1; - sample_y = (INPUT1_TYPE) sample_y_low; + sample_y = (INPUT1_TYPE)sample_y_low; } else { sample_y_high = sample_y_low + 1; } if (sample_x_low >= INPUT0_SIZE_X - 1) { sample_x_high = sample_x_low = INPUT0_SIZE_X - 1; - sample_x = (INPUT1_TYPE) sample_x_low; + sample_x = (INPUT1_TYPE)sample_x_low; } else { sample_x_high = sample_x_low + 1; } // weight calculation for bilinear interpolation - weight_top = sample_y - (INPUT1_TYPE) sample_y_low; - weight_left = sample_x - (INPUT1_TYPE) sample_x_low; - weight_bottom = (INPUT1_TYPE) 1.f - weight_top; - weight_right = (INPUT1_TYPE) 1.f - weight_left; + weight_top = sample_y - (INPUT1_TYPE)sample_y_low; + weight_left = sample_x - (INPUT1_TYPE)sample_x_low; + weight_bottom = INPUT1_VAL_ONE - weight_top; + weight_right = INPUT1_VAL_ONE - weight_left; } - const INPUT0_TYPE top_left = data[sample_y_low * INPUT0_Y_PITCH + sample_x_low * INPUT0_X_PITCH]; - const INPUT0_TYPE top_right = data[sample_y_low * INPUT0_Y_PITCH + sample_x_high * INPUT0_X_PITCH]; - const INPUT0_TYPE bottom_left = data[sample_y_high * INPUT0_Y_PITCH + sample_x_low * INPUT0_X_PITCH]; - const INPUT0_TYPE bottom_right = data[sample_y_high * INPUT0_Y_PITCH + sample_x_high * INPUT0_X_PITCH]; - - const INPUT0_TYPE interpolated_value = weight_bottom * weight_right * top_left + - weight_bottom * weight_left * top_right + - weight_top * weight_right * bottom_left + - weight_top * weight_left * bottom_right; + + const INPUT0_TYPE top_left = data[INPUT0_GET_INDEX(0, 0, sample_y_low, sample_x_low)]; + const INPUT0_TYPE top_right = data[INPUT0_GET_INDEX(0, 0, sample_y_low, sample_x_high)]; + const INPUT0_TYPE bottom_left = data[INPUT0_GET_INDEX(0, 0, sample_y_high, sample_x_low)]; + const INPUT0_TYPE bottom_right = data[INPUT0_GET_INDEX(0, 0, sample_y_high, sample_x_high)]; + + const INPUT0_TYPE interpolated_value = + weight_bottom * weight_right * top_left + weight_bottom * weight_left * top_right + + weight_top * weight_right * bottom_left + weight_top * weight_left * bottom_right; + #if MAX_POOL pooled_value = MAX(pooled_value, interpolated_value); #elif AVG_POOL @@ -108,7 +112,7 @@ KERNEL(roi_align_ref) #if AVG_POOL pooled_value /= sampling_ratio_x * sampling_ratio_x; #endif - const uint output_offset = OUTPUT_OFFSET + x*OUTPUT_X_PITCH + y*OUTPUT_Y_PITCH + c*OUTPUT_FEATURE_PITCH + r*OUTPUT_BATCH_PITCH; - dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)pooled_value, ACTIVATION_PARAMS); + + dst_data[OUTPUT_GET_INDEX(r, c, y, x)] = ACTIVATION((OUTPUT_TYPE)pooled_value, ACTIVATION_PARAMS); } diff --git a/src/plugins/intel_gpu/src/plugin/ops/roi_align.cpp b/src/plugins/intel_gpu/src/plugin/ops/roi_align.cpp index 416249fc949..567976e52c1 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/roi_align.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/roi_align.cpp @@ -1,29 +1,44 @@ // Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "intel_gpu/plugin/program.hpp" -#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/roi_align.hpp" -#include "intel_gpu/primitives/roi_align.hpp" + #include +#include "intel_gpu/plugin/common_utils.hpp" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/primitives/roi_align.hpp" + namespace ov { namespace intel_gpu { namespace { -cldnn::roi_align::PoolingMode from(ngraph::op::v3::ROIAlign::PoolingMode mode) { +template +cldnn::roi_align::PoolingMode from(T mode) { switch (mode) { - case ngraph::op::v3::ROIAlign::PoolingMode::MAX: - return cldnn::roi_align::PoolingMode::Max; - case ngraph::op::v3::ROIAlign::PoolingMode::AVG: + case T::MAX: + return cldnn::roi_align::PoolingMode::max; + case T::AVG: default: - return cldnn::roi_align::PoolingMode::Avg; + return cldnn::roi_align::PoolingMode::avg; + } +} + +cldnn::roi_align::AlignedMode from(ngraph::op::v9::ROIAlign::AlignedMode mode) { + switch (mode) { + case ngraph::op::v9::ROIAlign::AlignedMode::HALF_PIXEL_FOR_NN: + return cldnn::roi_align::AlignedMode::half_pixel_for_nn; + case ngraph::op::v9::ROIAlign::AlignedMode::HALF_PIXEL: + return cldnn::roi_align::AlignedMode::half_pixel; + case ngraph::op::v9::ROIAlign::AlignedMode::ASYMMETRIC: + default: + return cldnn::roi_align::AlignedMode::asymmetric; } } void CreateROIAlignOp(Program& p, const std::shared_ptr& op) { - p.ValidateInputs(op, { 3 }); + p.ValidateInputs(op, {3}); auto roi_align_prim = cldnn::roi_align(layer_type_name_ID(op), p.GetInputPrimitiveIDs(op), op->get_pooled_h(), @@ -31,14 +46,31 @@ void CreateROIAlignOp(Program& p, const std::shared_ptrget_sampling_ratio(), op->get_spatial_scale(), from(op->get_mode()), + cldnn::roi_align::AlignedMode::asymmetric, op->get_friendly_name()); p.AddPrimitive(roi_align_prim); p.AddPrimitiveToProfiler(op); } -} // anonymous namespace +void CreateROIAlignOp(Program& p, const std::shared_ptr& op) { + p.ValidateInputs(op, {3}); + auto roi_align_prim = cldnn::roi_align(layer_type_name_ID(op), + p.GetInputPrimitiveIDs(op), + op->get_pooled_h(), + op->get_pooled_w(), + op->get_sampling_ratio(), + op->get_spatial_scale(), + from(op->get_mode()), + from(op->get_aligned_mode()), + op->get_friendly_name()); + p.AddPrimitive(roi_align_prim); + p.AddPrimitiveToProfiler(op); +} + +} // anonymous namespace REGISTER_FACTORY_IMPL(v3, ROIAlign); +REGISTER_FACTORY_IMPL(v9, ROIAlign); } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp new file mode 100644 index 00000000000..ebebe27c124 --- /dev/null +++ b/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp @@ -0,0 +1,178 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +#include "test_utils.h" + +using namespace cldnn; +using namespace ::tests; + +template +struct TypesWithFormat { + using DataType = TD; + using IndexType = TI; + static const cldnn::format::type format = fmt; +}; + +template +struct roi_align_test : public testing::Test { + using TD = typename Types::DataType; + using TI = typename Types::IndexType; + const data_types device_data_type = type_to_data_type::value; + const data_types device_ind_type = type_to_data_type::value; + const cldnn::format::type blocked_format = Types::format; + const cldnn::format::type plain_format = format::bfyx; + + const int pooled_h{2}; + const int pooled_w{2}; + const int sampling_ratio{2}; + const float spatial_scale{1}; + + const std::vector input_data = { + TD(0.f), TD(1.f), TD(8.f), TD(5.f), TD(5.f), TD(2.f), TD(0.f), TD(7.f), TD(7.f), TD(10.f), TD(4.f), + TD(5.f), TD(9.f), TD(0.f), TD(0.f), TD(5.f), TD(7.f), TD(0.f), TD(4.f), TD(0.f), TD(4.f), TD(7.f), + TD(6.f), TD(10.f), TD(9.f), TD(5.f), TD(1.f), TD(7.f), TD(4.f), TD(7.f), TD(10.f), TD(8.f), TD(2.f), + TD(0.f), TD(8.f), TD(3.f), TD(6.f), TD(8.f), TD(10.f), TD(4.f), TD(2.f), TD(10.f), TD(7.f), TD(8.f), + TD(7.f), TD(0.f), TD(6.f), TD(9.f), TD(2.f), TD(4.f), TD(8.f), TD(5.f), TD(2.f), TD(3.f), TD(3.f), + TD(1.f), TD(5.f), TD(9.f), TD(10.f), TD(0.f), TD(9.f), TD(5.f), TD(5.f), TD(3.f), TD(10.f), TD(5.f), + TD(2.f), TD(0.f), TD(10.f), TD(0.f), TD(5.f), TD(4.f), TD(3.f), TD(10.f), TD(5.f), TD(5.f), TD(10.f), + TD(0.f), TD(8.f), TD(8.f), TD(9.f), TD(1.f), TD(0.f), TD(7.f), TD(9.f), TD(6.f), TD(8.f), TD(7.f), + TD(10.f), TD(9.f), TD(2.f), TD(3.f), TD(3.f), TD(5.f), TD(6.f), TD(9.f), TD(4.f), TD(9.f), TD(2.f), + TD(4.f), TD(5.f), TD(5.f), TD(3.f), TD(1.f), TD(1.f), TD(6.f), TD(8.f), TD(0.f), TD(5.f), TD(5.f), + TD(10.f), TD(8.f), TD(6.f), TD(9.f), TD(6.f), TD(9.f), TD(1.f), TD(2.f), TD(7.f), TD(1.f), TD(1.f), + TD(3.f), TD(0.f), TD(4.f), TD(0.f), TD(7.f), TD(10.f), TD(2.f)}; + const std::vector coords_data = {TD(2.f), TD(2.f), TD(4.f), TD(4.f), TD(2.f), TD(2.f), TD(4.f), TD(4.f)}; + const std::vector roi_data = {0, 1}; + + const layout input_lt = layout(device_data_type, plain_format, {2, 1, 8, 8}); + const layout coords_lt = layout(device_data_type, plain_format, {2, 4, 1, 1}); + const layout roi_lt = layout(device_ind_type, plain_format, {2, 1, 1, 1}); + + memory::ptr get_memory(engine& engine, const layout& lt, const std::vector& data) const { + auto mem = engine.allocate_memory(lt); + tests::set_values(mem, data); + return mem; + } + + memory::ptr get_roi_memory(engine& engine) const { + auto mem = engine.allocate_memory(roi_lt); + tests::set_values(mem, roi_data); + return mem; + } + + void execute(const std::vector& expected_output, + roi_align::PoolingMode pooling_mode, + roi_align::AlignedMode aligned_mode) const { + auto& engine = get_test_engine(); + + auto input = get_memory(engine, input_lt, input_data); + auto coords = get_memory(engine, coords_lt, coords_data); + auto roi_ind = get_roi_memory(engine); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("coords", coords->get_layout())); + topology.add(input_layout("roi_ind", roi_ind->get_layout())); + topology.add(reorder("reorder_input", "input", blocked_format, device_data_type)); + topology.add(reorder("reorder_coords", "coords", blocked_format, device_data_type)); + topology.add(reorder("reorder_ind", "roi_ind", blocked_format, device_ind_type)); + topology.add(roi_align("roi_align", + {"reorder_input", "reorder_coords", "reorder_ind"}, + pooled_h, + pooled_w, + sampling_ratio, + spatial_scale, + pooling_mode, + aligned_mode)); + topology.add(reorder("out", "roi_align", plain_format, device_data_type)); + + network network(engine, topology); + network.set_input_data("input", input); + network.set_input_data("coords", coords); + network.set_input_data("roi_ind", roi_ind); + + auto outputs = network.execute(); + + auto output = outputs.at("out").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + ASSERT_EQ(output_ptr.size(), expected_output.size()); + for (uint32_t i = 0; i < expected_output.size(); ++i) { + EXPECT_NEAR(output_ptr[i], expected_output[i], 0.01); + } + } +}; + +// it's a bit overloaded with the cartesian product of types and formats, but that's the lesser evil +// since we have specific type for expected values that are tied to specific input modes +// so that Combine approach could avoid manual combinations but it would be much more complicated +using roi_align_test_types = testing::Types, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat, + TypesWithFormat>; + +TYPED_TEST_SUITE(roi_align_test, roi_align_test_types); + +TYPED_TEST(roi_align_test, avg_asymmetric) { + using TD = typename TypeParam::DataType; + const std::vector + expected_output{TD(3.f), TD(3.75f), TD(4.75f), TD(5.f), TD(3.f), TD(5.5f), TD(2.75f), TD(3.75f)}; + this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric); +} + +TYPED_TEST(roi_align_test, avg_half_pixel_for_nn) { + using TD = typename TypeParam::DataType; + const std::vector expected_output = + {TD(3.14f), TD(2.16f), TD(2.86f), TD(5.03f), TD(1.83f), TD(5.84f), TD(2.77f), TD(3.44f)}; + this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn); +} + +TYPED_TEST(roi_align_test, max_half_pixel) { + using TD = typename TypeParam::DataType; + const std::vector expected_output = + {TD(4.375f), TD(4.9375f), TD(5.6875f), TD(5.625f), TD(4.625f), TD(7.125f), TD(3.3125f), TD(4.3125f)}; + this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel); +} diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/roi_align.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/roi_align.cpp index ca39592bde6..217614d5c92 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/roi_align.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/roi_align.cpp @@ -1,16 +1,16 @@ // Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "single_layer_tests/roi_align.hpp" + #include -#include "single_layer_tests/roi_align.hpp" #include "common_test_utils/test_constants.hpp" using namespace LayerTestsDefinitions; - const std::vector netPRCs = { - InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP32 // There is no possibility to test ROIAlign in fp16 precision, // because on edge cases where in fp32 version ROI value is // a little bit smaller than the nearest integer value, @@ -19,44 +19,88 @@ const std::vector netPRCs = { // in fp32 and fp16 precisions. // In real AI applications this problem is solved by precision-aware training. - // InferenceEngine::Precision::FP16, + // InferenceEngine::Precision::FP16 }; -const auto ROIAlignCases_average = - ::testing::Combine( - ::testing::ValuesIn( - std::vector> { - { 3, 8, 16, 16 }, - { 2, 1, 16, 16 }, - { 2, 1, 8, 16 }}), - ::testing::Values(std::vector{ 2, 4 }), - ::testing::Values(2), - ::testing::Values(2), - ::testing::ValuesIn(std::vector { 1, 0.625 }), - ::testing::Values(2), - ::testing::Values("avg"), - ::testing::ValuesIn(netPRCs), - ::testing::Values(CommonTestUtils::DEVICE_GPU) -); +const auto ROIAlignCases_average = ::testing::Combine( + ::testing::ValuesIn(std::vector>{{3, 8, 16, 16}, {2, 1, 16, 16}, {2, 1, 8, 16}}), + ::testing::Values(std::vector{2, 4}), + ::testing::Values(2), + ::testing::Values(2), + ::testing::ValuesIn(std::vector{1, 0.625}), + ::testing::Values(2), + ::testing::Values("avg"), + ::testing::ValuesIn(netPRCs), + ::testing::Values(CommonTestUtils::DEVICE_GPU)); -INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_average, ROIAlignLayerTest, ROIAlignCases_average, ROIAlignLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_average, + ROIAlignLayerTest, + ROIAlignCases_average, + ROIAlignLayerTest::getTestCaseName); -const auto ROIAlignCases_max = - ::testing::Combine( - ::testing::ValuesIn( - std::vector> { - { 2, 8, 20, 20 }, - { 2, 1, 20, 20 }, - { 2, 1, 10, 20 } - }), - ::testing::Values(std::vector{ 2, 4 }), - ::testing::Values(2), - ::testing::Values(2), - ::testing::ValuesIn(std::vector { 1, 0.625 }), - ::testing::Values(2), - ::testing::Values("max"), - ::testing::ValuesIn(netPRCs), - ::testing::Values(CommonTestUtils::DEVICE_GPU) -); +const auto ROIAlignCases_max = ::testing::Combine( + ::testing::ValuesIn(std::vector>{{2, 8, 20, 20}, {2, 1, 20, 20}, {2, 1, 10, 20}}), + ::testing::Values(std::vector{2, 4}), + ::testing::Values(2), + ::testing::Values(2), + ::testing::ValuesIn(std::vector{1, 0.625}), + ::testing::Values(2), + ::testing::Values("max"), + ::testing::ValuesIn(netPRCs), + ::testing::Values(CommonTestUtils::DEVICE_GPU)); -INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_max, ROIAlignLayerTest, ROIAlignCases_max, ROIAlignLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_max, + ROIAlignLayerTest, + ROIAlignCases_max, + ROIAlignLayerTest::getTestCaseName); + +const auto ROIAlignCases_average_asymmetric = ::testing::Combine( + ::testing::ValuesIn(std::vector>{{2, 1, 8, 8}, {2, 8, 20, 20}, {2, 1, 20, 20}, {2, 1, 10, 20}}), + ::testing::Values(std::vector{2, 4}), + ::testing::Values(2), + ::testing::Values(2), + ::testing::ValuesIn(std::vector{1, 0.625}), + ::testing::Values(2), + ::testing::Values("avg"), + ::testing::Values("asymmetric"), + ::testing::ValuesIn(netPRCs), + ::testing::Values(CommonTestUtils::DEVICE_GPU)); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_avg_asym, + ROIAlignV9LayerTest, + ROIAlignCases_average_asymmetric, + ROIAlignV9LayerTest::getTestCaseName); + +const auto ROIAlignCases_average_half_pixel_for_nn = ::testing::Combine( + ::testing::ValuesIn(std::vector>{{2, 1, 8, 8}, {2, 8, 20, 20}, {2, 1, 20, 20}, {2, 1, 10, 20}}), + ::testing::Values(std::vector{2, 4}), + ::testing::Values(2), + ::testing::Values(2), + ::testing::ValuesIn(std::vector{1, 0.625}), + ::testing::Values(2), + ::testing::Values("avg"), + ::testing::Values("half_pixel_for_nn"), + ::testing::ValuesIn(netPRCs), + ::testing::Values(CommonTestUtils::DEVICE_GPU)); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_avg_hpfn, + ROIAlignV9LayerTest, + ROIAlignCases_average_half_pixel_for_nn, + ROIAlignV9LayerTest::getTestCaseName); + +const auto ROIAlignCases_maximum_half_pixel = ::testing::Combine( + ::testing::ValuesIn(std::vector>{{2, 1, 8, 8}, {2, 8, 20, 20}, {2, 1, 20, 20}, {2, 1, 10, 20}}), + ::testing::Values(std::vector{2, 4}), + ::testing::Values(2), + ::testing::Values(2), + ::testing::ValuesIn(std::vector{1, 0.625}), + ::testing::Values(2), + ::testing::Values("max"), + ::testing::Values("half_pixel"), + ::testing::ValuesIn(netPRCs), + ::testing::Values(CommonTestUtils::DEVICE_GPU)); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_max_hp, + ROIAlignV9LayerTest, + ROIAlignCases_maximum_half_pixel, + ROIAlignV9LayerTest::getTestCaseName); diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/roi_align.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/roi_align.hpp index bf9cfeb92d7..a8eecb0ca98 100644 --- a/src/tests/functional/plugin/shared/include/single_layer_tests/roi_align.hpp +++ b/src/tests/functional/plugin/shared/include/single_layer_tests/roi_align.hpp @@ -12,4 +12,8 @@ TEST_P(ROIAlignLayerTest, CompareWithRefs) { Run(); } +TEST_P(ROIAlignV9LayerTest, CompareWithRefs) { + Run(); +} + } // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roi_align.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roi_align.hpp index 5d61726c001..6c1f76d8f63 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roi_align.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roi_align.hpp @@ -37,4 +37,32 @@ private: std::string poolingMode; }; +using roialignV9Params = std::tuple, // feature map shape + std::vector, // proposal coords shape + int, // bin's row count + int, // bin's column count + float, // spatial scale + int, // pooling ratio + std::string, // pooling mode + std::string, // roi aligned mode + InferenceEngine::Precision, // net precision + LayerTestsUtils::TargetDevice>; // device name + +class ROIAlignV9LayerTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; + +private: + int pooledH; + int pooledW; + float spatialScale; + int poolingRatio; + std::string poolingMode; + std::string roiAlignedMode; +}; + } // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/src/single_layer/roi_align.cpp b/src/tests/functional/shared_test_classes/src/single_layer/roi_align.cpp index df0b3dd6340..0acb6e4c29c 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/roi_align.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/roi_align.cpp @@ -2,10 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "shared_test_classes/single_layer/roi_align.hpp" + #include +#include #include "ngraph_functions/builders.hpp" -#include "shared_test_classes/single_layer/roi_align.hpp" +#include "openvino/core/enum_names.hpp" using namespace InferenceEngine; using namespace FuncTestUtils::PrecisionUtils; @@ -77,36 +80,131 @@ void ROIAlignLayerTest::SetUp() { std::vector inputShape; std::vector coordsShape; InferenceEngine::Precision netPrecision; - std::tie(inputShape, coordsShape, pooledH, pooledW, - spatialScale, poolingRatio, poolingMode, netPrecision, targetDevice) = this->GetParam(); + std::tie(inputShape, + coordsShape, + pooledH, + pooledW, + spatialScale, + poolingRatio, + poolingMode, + netPrecision, + targetDevice) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); + auto paramOuts = + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector proposalVector; std::vector roiIdxVector; proposalVector.resize(coordsShape[0] * 4); roiIdxVector.resize(coordsShape[0]); - fillCoordTensor(proposalVector, inputShape[2], inputShape[3], - spatialScale, poolingRatio, pooledH, pooledW); + fillCoordTensor(proposalVector, inputShape[2], inputShape[3], spatialScale, poolingRatio, pooledH, pooledW); fillIdxTensor(roiIdxVector, inputShape[0]); - ngraph::Shape idxShape = { coordsShape[0] }; + ngraph::Shape idxShape = {coordsShape[0]}; auto coords = std::make_shared(ngPrc, coordsShape, proposalVector.data()); auto roisIdx = std::make_shared(ngraph::element::i32, idxShape, roiIdxVector.data()); - std::shared_ptr roiAlign = - std::make_shared(paramOuts[0], - coords, - roisIdx, - pooledH, - pooledW, - poolingRatio, - spatialScale, - poolingMode); + std::shared_ptr roiAlign = std::make_shared(paramOuts[0], + coords, + roisIdx, + pooledH, + pooledW, + poolingRatio, + spatialScale, + poolingMode); ngraph::ResultVector results{std::make_shared(roiAlign)}; function = std::make_shared(results, params, "roi_align"); } + +std::string ROIAlignV9LayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector inputShape; + std::vector coordsShape; + + int pooledH; + int pooledW; + float spatialScale; + int poolingRatio; + std::string poolingMode; + std::string roiAlignedMode; + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::tie(inputShape, + coordsShape, + pooledH, + pooledW, + spatialScale, + poolingRatio, + poolingMode, + roiAlignedMode, + netPrecision, + targetDevice) = obj.param; + + std::ostringstream result; + + result << "in_shape=" << CommonTestUtils::vec2str(inputShape) << "_"; + result << "coord_shape=" << CommonTestUtils::vec2str(coordsShape) << "_"; + result << "pooled_h=" << pooledH << "_"; + result << "pooled_w=" << pooledW << "_"; + result << "spatial_scale=" << spatialScale << "_"; + result << "pooling_ratio=" << poolingRatio << "_"; + result << "mode=" << poolingMode << "_"; + result << "mode=" << roiAlignedMode << "_"; + result << "prec=" << netPrecision.name() << "_"; + result << "dev=" << targetDevice; + return result.str(); +} + +void ROIAlignV9LayerTest::SetUp() { + std::vector inputShape; + std::vector coordsShape; + InferenceEngine::Precision netPrecision; + std::tie(inputShape, + coordsShape, + pooledH, + pooledW, + spatialScale, + poolingRatio, + poolingMode, + roiAlignedMode, + netPrecision, + targetDevice) = this->GetParam(); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramOuts = + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + std::vector proposalVector; + std::vector roiIdxVector; + proposalVector.resize(coordsShape[0] * 4); + roiIdxVector.resize(coordsShape[0]); + + ROIAlignLayerTest::fillCoordTensor(proposalVector, + inputShape[2], + inputShape[3], + spatialScale, + poolingRatio, + pooledH, + pooledW); + ROIAlignLayerTest::fillIdxTensor(roiIdxVector, inputShape[0]); + ngraph::Shape idxShape = {coordsShape[0]}; + + auto coords = std::make_shared(ngPrc, coordsShape, proposalVector.data()); + auto roisIdx = std::make_shared(ngraph::element::i32, idxShape, roiIdxVector.data()); + + std::shared_ptr roiAlign = std::make_shared( + paramOuts[0], + coords, + roisIdx, + pooledH, + pooledW, + poolingRatio, + spatialScale, + ov::EnumNames::as_enum(poolingMode), + ov::EnumNames::as_enum(roiAlignedMode)); + + ngraph::ResultVector results{std::make_shared(roiAlign)}; + function = std::make_shared(results, params, "roi_align"); +} } // namespace LayerTestsDefinitions