diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/max_unpooling.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/max_unpooling.hpp deleted file mode 100644 index 6d12a181660..00000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/max_unpooling.hpp +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "primitive.hpp" -#include - -namespace cldnn { -/// @addtogroup cpp_api C++ API -/// @{ -/// @addtogroup cpp_topology Network Topology -/// @{ -/// @addtogroup cpp_primitives Primitives -/// @{ - -/// @brief Performs "max_unpooling" operation. -/// @details Reverse operation of max pooling, based on the argmax data where indices of each max pooling region are stored. -struct max_unpooling : public primitive_base { - CLDNN_DECLARE_PRIMITIVE(max_unpooling) - - /// @brief Constructs max_unpooling primitive. - /// @param id This primitive id. - /// @param input Input primitive id. - /// @param argmax Primitive id which contains indices of each max pooling region. - /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type. - /// @param stride Defines shift in input buffer between adjacent calculations of output values. - /// Used only for output size computation. - /// @param size Pooling kernel size. Used only for output size computation. - /// @param pad Defines logical pad value added to input tensor. Used only for output size computation. - max_unpooling(const primitive_id& id, - const primitive_id& input, - const primitive_id& argmax, - const tensor& size, - const tensor& stride, - const tensor& pad = {0, 0, 0, 0}, - const padding& output_padding = padding()) - : primitive_base(id, {input}, output_padding), - argmax(argmax), - pad(pad), - stride(stride), - size(size), - with_output_size(false) {} - - /// @brief Constructs max_unpooling primitive (with provided output size) - /// @param id This primitive id. - /// @param input Input primitive id. - /// @param argmax Primitive id which contains indices of each max pooling region. - /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type. - /// @param output_size User-defined output data size of the primitive (w/o padding). - max_unpooling(const primitive_id& id, - const primitive_id& input, - const primitive_id& argmax, - tensor output_size, - const padding& output_padding = padding()) - : primitive_base(id, {input}, output_padding), - argmax(argmax), - with_output_size(true), - output_size(output_size) {} - - /// @brief Primitive id which contains indices of each max pooling region. - /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type. - primitive_id argmax; - /// @brief Defines logical pad value added to input tensor. - tensor pad; - /// @brief Defines shift in input buffer between adjacent calculations of output values. Used only for output size computation. - tensor stride; - /// @brief Pooling kernel size. Used only for output size computation. - tensor size; - /// @brief Indicates that the primitive has user-defined output size (non-zero value). Used only for output size computation. - bool with_output_size; - /// @brief User-defined output data size of the primitive (w/o padding). - tensor output_size; - -protected: - std::vector> get_dependencies() const override { return {argmax}; } -}; -/// @} -/// @} -/// @} -} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp index 3dc3d4698f2..eae176a42e4 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp @@ -26,8 +26,6 @@ enum class pooling_mode : int32_t { average, /// @brief Average-pooling method without values which are outside of the input. average_no_padding, - /// @brief Maximum-pooling method with additional buffer to store argmax indices. - max_with_argmax, /// @brief Pooling with bilinear interpolation. bilinear, /// @brief Deformable pooling with bilinear interpolation. @@ -51,46 +49,18 @@ struct pooling : public primitive_base { pooling_mode mode, const ov::Shape& size, const ov::Strides& stride, - const ov::Shape& pad = {0, 0}, + const ov::Shape& pads_begin = {0, 0}, + const ov::Shape& pads_end = {0, 0}, const padding& output_padding = padding()) : primitive_base(id, {input}, output_padding), - argmax(""), mode(static_cast(mode)), - global_pooling(false), - pad(pad), + pads_begin(pads_begin), + pads_end(pads_end), stride(stride), size(size), - with_output_size(false), - pad_end(size.size(), 0) {} + with_output_size(false) {} - /// @brief Constructs pooling primitive with argmax. - /// @param id This primitive id. - /// @param input Input primitive id. - /// @param argmax Primitive id which contains indices of each max pooling region. - /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type. - /// @param mode Pooling mode. - /// @param stride Defines shift in input buffer between adjacent calculations of output values. - /// @param size Pooling kernel size. - /// @param pad Defines logical pad value added to input tensor - pooling(const primitive_id& id, - const primitive_id& input, - const primitive_id& argmax, - pooling_mode mode, - const ov::Shape& size, - const ov::Strides& stride, - const ov::Shape& pad = {0, 0}, - const padding& output_padding = padding()) - : primitive_base(id, {input}, output_padding), - argmax(argmax), - mode(static_cast(mode)), - global_pooling(false), - pad(pad), - stride(stride), - size(size), - with_output_size(false), - pad_end(size.size(), 0) {} - - /// @brief Constructs pooling primitive (computes input paddings to match output size). + /// @brief Constructs pooling primitive with known output shape. /// @param id This primitive id. /// @param input Input primitive id. /// @param mode Pooling mode. @@ -103,68 +73,19 @@ struct pooling : public primitive_base { pooling_mode mode, const ov::Shape& size, const ov::Strides& stride, - const ov::Shape& pad, + const ov::Shape& pads_begin, + const ov::Shape& pads_end, tensor output_size, const data_types output_data_type, const padding& output_padding = padding()) : primitive_base(id, {input}, output_padding, optional_data_type{output_data_type}), - argmax(""), mode(static_cast(mode)), - global_pooling(false), - pad(pad), + pads_begin(pads_begin), + pads_end(pads_end), stride(stride), size(size), with_output_size(true), - output_size(output_size), - pad_end(size.size(), 0) {} - - /// @brief Constructs pooling primitive with argmax (computes input paddings to match output size). - /// @param id This primitive id. - /// @param input Input primitive id. - /// @param argmax Primitive id which contains indices of each max pooling region. - /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type. - /// @param mode Pooling mode. - /// @param stride Defines shift in input buffer between adjacent calculations of output values. - /// @param size Pooling kernel size. - /// @param pad Defines logical pad value added to input tensor. - /// @param output_size User-defined output data size of the primitive (w/o padding). - pooling(const primitive_id& id, - const primitive_id& input, - const primitive_id& argmax, - pooling_mode mode, - const ov::Shape& size, - const ov::Strides& stride, - const ov::Shape& pad, - tensor output_size, - const padding& output_padding = padding()) - : primitive_base(id, {input}, output_padding), - argmax(argmax), - mode(static_cast(mode)), - global_pooling(false), - pad(pad), - stride(stride), - size(size), - with_output_size(true), - output_size(output_size), - pad_end(size.size(), 0) {} - - /// @brief Constructs pooling primitive with kernel size equal to the spatial dimension of input tensor. - /// @param id This primitive id. - /// @param input Input primitive id. - /// @param mode Pooling mode. - pooling(const primitive_id& id, - const primitive_id& input, - pooling_mode mode, - const padding& output_padding = padding()) - : primitive_base(id, {input}, output_padding), - argmax(""), - mode(static_cast(mode)), - global_pooling(true), - pad({0, 0}), - stride({1, 1}), - size({0, 0}), - with_output_size(false), - pad_end(size.size(), 0) {} + output_size(output_size) {} /// @brief Constructs pooling primitive that supports MaxPool features from opset8 (dilation and indices output). /// @param id This primitive id. @@ -184,41 +105,35 @@ struct pooling : public primitive_base { const ov::Shape& size, const ov::Strides& stride, const ov::Strides& dilation, - const ov::Shape& pad, - const ov::Shape& pad_end, + const ov::Shape& pads_begin, + const ov::Shape& pads_end, int64_t axis, data_types index_element_type, tensor output_size, const data_types output_data_type, const padding& output_padding = padding()) : primitive_base(id, {input, indices_output}, output_padding, optional_data_type{output_data_type}), - argmax(""), indices_output(indices_output), mode(pooling_mode::max), - global_pooling(false), - pad(pad), + pads_begin(pads_begin), + pads_end(pads_end), stride(stride), dilation(dilation), size(size), with_output_size(true), output_size(output_size), - pad_end(pad_end), axis(axis), index_element_type(index_element_type), - maxPoolOpset8Features(true) - {} + maxPoolOpset8Features(true) {} - /// @brief Primitive id which contains indices of each max pooling region. - /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type. - primitive_id argmax; /// @brief Primitive id which contains indices output. primitive_id indices_output; /// @brief Pooling mode. pooling_mode mode; - /// @brief Global pooling (kernel size is equal to the spatial dimension of input tensor) - bool global_pooling; /// @brief Defines logical pad value added to input tensor. - ov::Shape pad; + ov::Shape pads_begin; + /// @brief Defines a shift, relative to the end of padding shape. + ov::Shape pads_end; /// @brief Defines shift in input buffer between adjacent calculations of output values. ov::Strides stride; /// @brief Defines index of next pixel to select when pooling @@ -229,8 +144,6 @@ struct pooling : public primitive_base { bool with_output_size; /// @brief User-defined output data size of the primitive (w/o padding). tensor output_size; - /// @brief Defines a shift, relative to the end of padding shape. - ov::Shape pad_end; /// @brief first dimension of input that should be used to calculate the upper bound of index output int64_t axis = 0; /// @brief type of index output @@ -240,8 +153,6 @@ struct pooling : public primitive_base { protected: std::vector> get_dependencies() const override { std::vector> ret; - if (!argmax.empty()) - ret.push_back(argmax); if (!indices_output.empty()) ret.push_back(indices_output); return ret; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp index 272aa39728b..ea4e943422b 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp @@ -111,7 +111,7 @@ void prepare_padding::run(program& p) { needed_padding = calc_sliding_window_needed_input_padding(prim_node.input().get_output_layout(), prim->output_size, size, - ov::CoordinateDiff(prim->pad.begin(), prim->pad.end()), + ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()), prim->stride, ov::Strides(prim->size.size(), 1), false, diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 4f9fb8eb2bb..b2d5d8206fe 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -649,11 +649,6 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { return data_type_traits::is_i8_u8(in_dt); }; - auto pooling_supports_fusings = [](pooling_node& node) -> bool { - auto pooling_mode = node.get_primitive()->mode; - return pooling_mode != cldnn::pooling_mode::max_with_argmax; - }; - auto dts_supports_fusings = [](depth_to_space_node& node) -> bool { bool input_conv = node.get_dependency(0).is_type(); bool out_eltw = node.get_users().front()->is_type(); @@ -772,7 +767,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { should_fuse |= input_data.is_type() && gemm_supports_fusings(input_data.as()); - should_fuse |= input_data.is_type() && pooling_supports_fusings(input_data.as()); + should_fuse |= input_data.is_type(); should_fuse |= input_data.is_type(); @@ -861,8 +856,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { _lo.get_optimization_attributes().use_onednn_impls || (in_dt_is_i8_u8 && out_dt_is_i8_u8)); - should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt() && - pooling_supports_fusings(input_data.as()); + should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); @@ -960,7 +954,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { (parents[i]->is_type()) || (parents[i]->is_type()) || (parents[i]->is_type()) || - (parents[i]->is_type() && pooling_supports_fusings(parents[i]->as())) || + (parents[i]->is_type()) || (parents[i]->is_type() && dts_supports_fusings(parents[i]->as())) || (parents[i]->is_type()) || (parents[i]->is_type() && reduce_supports_fusings(parents[i]->as())) || diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/trim_to_outputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/trim_to_outputs.cpp index f8af5a2badc..8d4e1fe0862 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/trim_to_outputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/trim_to_outputs.cpp @@ -8,7 +8,6 @@ // ToDo: remove those include with the appropriate code below once we will have support for multiple outputs of a // primitive -#include "max_unpooling_inst.h" #include "pooling_inst.h" #include #include @@ -27,13 +26,10 @@ void trim_to_outputs::run(program& p) { queue.push(&p.get_outputs()); std::vector special_nodes; - for (auto& node : p.get_processing_order()) { - if (node->is_type() || // input layout may become disconnected during prior boxes calculations so - // it may have not been marked at this place but we don't want to remove it - node->is_type() || // ToDo: remove this after support for multi-outputs in primitives will - // be implemented. - (node->is_type() && node->as().get_primitive()->mode == pooling_mode::max_with_argmax)) - special_nodes.push_back(node); + for (auto& node : p.get_processing_order()) { // input layout may become disconnected during prior boxes calculations so + if (node->is_type()) { // it may have not been marked at this place but we don't want to remove it + special_nodes.push_back(node); // ToDo: remove this after support for multi-outputs in primitives will + } // be implemented. } queue.push(&special_nodes); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/max_unpooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/max_unpooling.cpp deleted file mode 100644 index a3ec27b83bb..00000000000 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/max_unpooling.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "max_unpooling_inst.h" -#include "primitive_base.hpp" -#include "impls/implementation_map.hpp" -#include "intel_gpu/runtime/error_handler.hpp" -#include "intel_gpu/graph/network.hpp" -#include "kernel_selector_helper.h" -#include "max_unpooling/max_unpooling_kernel_selector.h" -#include "max_unpooling/max_unpooling_kernel_base.h" -#include - -namespace cldnn { -namespace ocl { - -struct max_unpooling_impl : typed_primitive_impl_ocl { - using parent = typed_primitive_impl_ocl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - -protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { - kernel_arguments_data args = parent::get_arguments(instance, split); - args.inputs.push_back(instance.dep_memory_ptr(1)); - return args; - } - -public: - event::ptr execute_impl(const std::vector& events, max_unpooling_inst& instance) override { - // clear output buffer - std::vector tmp_events(events); - auto& stream = instance.get_network().get_stream(); - auto ev = instance.output_memory().fill(stream); - tmp_events.push_back(ev); - return parent::execute_impl(tmp_events, instance); - } - - static primitive_impl* create(const max_unpooling_node& arg, const kernel_impl_params& impl_param) { - auto max_unpooling_params = get_default_params(impl_param); - auto max_unpooling_optional_params = - get_default_optional_params(arg.get_program()); - - const auto max_idx = 1; - max_unpooling_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[max_idx])); - - auto& kernel_selector = kernel_selector::max_unpooling_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(max_unpooling_params, max_unpooling_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto max_unpool = new max_unpooling_impl(arg, best_kernels[0]); - - return max_unpool; - } -}; - -namespace detail { - -attach_max_unpooling_impl::attach_max_unpooling_impl() { - implementation_map::add(impl_types::ocl, max_unpooling_impl::create, { - std::make_tuple(data_types::f32, format::yxfb), - std::make_tuple(data_types::f16, format::yxfb), - std::make_tuple(data_types::f32, format::bfyx), - std::make_tuple(data_types::f16, format::bfyx), - std::make_tuple(data_types::i8, format::bfyx), - std::make_tuple(data_types::i8, format::yxfb), - std::make_tuple(data_types::f32, format::byxf), - std::make_tuple(data_types::f16, format::byxf), - std::make_tuple(data_types::i8, format::byxf), - }); -} - -} // namespace detail -} // namespace ocl -} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp index e4d04af4f4e..32541acfb2d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp @@ -21,11 +21,9 @@ void validate_args(const pooling_node& arg) { auto stride_rank = arg.get_primitive()->stride.size(); auto window_rank = arg.get_primitive()->size.size(); - if (!arg.get_primitive()->global_pooling) { - CLDNN_ERROR_NOT_EQUAL(arg.id(), "input dimensions", input_rank, "output dimensions", output_rank, ""); - CLDNN_ERROR_NOT_EQUAL(arg.id(), "stride dimensions", stride_rank, "output dimensions", output_rank, ""); - CLDNN_ERROR_NOT_EQUAL(arg.id(), "window dimensions", window_rank, "output dimensions", output_rank, ""); - } + CLDNN_ERROR_NOT_EQUAL(arg.id(), "input dimensions", input_rank, "output dimensions", output_rank, ""); + CLDNN_ERROR_NOT_EQUAL(arg.id(), "stride dimensions", stride_rank, "output dimensions", output_rank, ""); + CLDNN_ERROR_NOT_EQUAL(arg.id(), "window dimensions", window_rank, "output dimensions", output_rank, ""); } kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) { @@ -36,8 +34,6 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) { return kernel_selector::pool_type::AVG; case pooling_mode::average_no_padding: return kernel_selector::pool_type::AVG; - case pooling_mode::max_with_argmax: - return kernel_selector::pool_type::MAX_WITH_ARGMAX; default: assert(0); return kernel_selector::pool_type::MAX; @@ -47,8 +43,6 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) { kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mode) { switch (mode) { case pooling_mode::max: - case pooling_mode::max_with_argmax: - return kernel_selector::kernel_divider_mode::DONT_CARE; case pooling_mode::average: return kernel_selector::kernel_divider_mode::FIXED; case pooling_mode::average_no_padding: @@ -71,8 +65,6 @@ struct pooling_impl : typed_primitive_impl_ocl { protected: kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { kernel_arguments_data args = parent::get_arguments(instance, split); - if (!instance.argument->argmax.empty()) - args.inputs.push_back(instance.dep_memory_ptr(1)); return args; } @@ -102,7 +94,9 @@ public: } const auto& stride = primitive->stride; - const auto& pad = primitive->pad; + const auto& pads_begin = primitive->pads_begin; + const auto& pads_end = primitive->pads_end; + const auto& dilation = primitive->dilation; auto kernel = primitive->size; const auto& input_layout = impl_param.input_layouts[0]; @@ -114,19 +108,12 @@ public: pp.poolType = cldnn_2_pool_type(primitive->mode); pp.remainderAction = kernel_selector::pool_remainder::CEIL; - if (primitive->global_pooling) { - kernel = ov::Shape(spatial_rank, 1); - for (size_t i = 0; i < spatial_rank; i++) { - kernel[i] = input_layout.spatial(spatial_rank - i - 1); - } - } - // check if last pooling window goes outside of input size + padding. If so the avg pooling size will be // adjusted to that, to work properly this calculation must take pad_end into account. auto dynamic_mode = false; for (size_t i = 0; i < spatial_rank; i++) { dynamic_mode |= (((output_layout.spatial(i) - 1) * stride[spatial_rank - i - 1]) + primitive->size[spatial_rank - i - 1]) > - (primitive->pad_end[spatial_rank - i - 1] + pad[spatial_rank - i - 1]) + input_layout.spatial(i); + (pads_end[spatial_rank - i - 1] + pads_begin[spatial_rank - i - 1]) + input_layout.spatial(i); } if (primitive->mode == pooling_mode::average && dynamic_mode) @@ -134,17 +121,14 @@ public: else pp.divMode = cldnn_2_kernel_divider_mode(primitive->mode); - if (primitive->mode == pooling_mode::max_with_argmax) - pool_params.inputs.push_back(convert_data_tensor(arg.argmax().get_output_layout())); - uint32_t kernel_z = kernel.size() >= 3 ? kernel[kernel.size() - 3] : 1; uint32_t kernel_y = kernel.size() >= 2 ? kernel[kernel.size() - 2] : 1; uint32_t kernel_x = kernel.size() >= 1 ? kernel[kernel.size() - 1] : 1; pp.poolSize = {kernel_x, kernel_y, kernel_z}; - uint32_t pad_z = std::max(pad.size() >= 3 ? pad[pad.size() - 3] : 0, 0); - uint32_t pad_y = std::max(pad.size() >= 2 ? pad[pad.size() - 2] : 0, 0); - uint32_t pad_x = std::max(pad.size() >= 1 ? pad[pad.size() - 1] : 0, 0); + uint32_t pad_z = std::max(pads_begin.size() >= 3 ? pads_begin[pads_begin.size() - 3] : 0, 0); + uint32_t pad_y = std::max(pads_begin.size() >= 2 ? pads_begin[pads_begin.size() - 2] : 0, 0); + uint32_t pad_x = std::max(pads_begin.size() >= 1 ? pads_begin[pads_begin.size() - 1] : 0, 0); pp.poolPad = {pad_x, pad_y, pad_z}; uint32_t stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp index 7061b8e284d..38b34aeacbc 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp @@ -46,7 +46,6 @@ void register_implementations() { REGISTER_OCL(lrn); REGISTER_OCL(lstm_gemm); REGISTER_OCL(lstm_elt); - REGISTER_OCL(max_unpooling); REGISTER_OCL(mutable_data); REGISTER_OCL(mvn); REGISTER_OCL(non_max_suppression); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp index 98f5cab6380..12f4c220796 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp @@ -43,7 +43,6 @@ #include "intel_gpu/primitives/lstm_dynamic.hpp" #include "intel_gpu/primitives/lstm_dynamic_input.hpp" #include "intel_gpu/primitives/lstm_dynamic_timeloop.hpp" -#include "intel_gpu/primitives/max_unpooling.hpp" #include "intel_gpu/primitives/mutable_data.hpp" #include "intel_gpu/primitives/mvn.hpp" #include "intel_gpu/primitives/non_max_suppression.hpp" @@ -127,7 +126,6 @@ REGISTER_OCL(grid_sample); REGISTER_OCL(lrn); REGISTER_OCL(lstm_gemm); REGISTER_OCL(lstm_elt); -REGISTER_OCL(max_unpooling); REGISTER_OCL(mutable_data); REGISTER_OCL(mvn); REGISTER_OCL(non_max_suppression); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp index 3488908780a..9bd38957e44 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp @@ -32,17 +32,12 @@ protected: dnnl::memory::dims stride(prim->stride.begin(), prim->stride.end()); dnnl::memory::dims kernel(prim->size.begin(), prim->size.end()); - dnnl::memory::dims pad_l(prim->pad.begin(), prim->pad.end()); - dnnl::memory::dims pad_r(prim->pad_end.begin(), prim->pad_end.end()); + dnnl::memory::dims pad_l(prim->pads_begin.begin(), prim->pads_begin.end()); + dnnl::memory::dims pad_r(prim->pads_end.begin(), prim->pads_end.end()); auto input_md = onednn::layout_to_memory_desc(input_layout); auto output_md = onednn::layout_to_memory_desc(output_layout); - if (prim->global_pooling) { - for (size_t i = 0; i < kernel.size(); i++) - kernel[i] = input_md.dims()[2 + i]; - } - for (size_t i = 0; i < kernel.size(); i++) { pad_r[i] = (output_md.dims()[2 + i] - 1) * stride[i] - input_md.dims()[2 + i] + kernel[i] - pad_l[i]; } diff --git a/src/plugins/intel_gpu/src/graph/include/max_unpooling_inst.h b/src/plugins/intel_gpu/src/graph/include/max_unpooling_inst.h deleted file mode 100644 index 7714386f587..00000000000 --- a/src/plugins/intel_gpu/src/graph/include/max_unpooling_inst.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "intel_gpu/primitives/max_unpooling.hpp" -#include "primitive_inst.h" - -#include -#include - -namespace cldnn { - -template <> -struct typed_program_node : public typed_program_node_base { - using parent = typed_program_node_base; - typed_program_node(const std::shared_ptr prim, program& prog); - -public: - using parent::parent; - program_node& input() const { return get_dependency(0); } - program_node& argmax() const { return get_dependency(1); } -}; - -using max_unpooling_node = typed_program_node; - -template <> -class typed_primitive_inst : public typed_primitive_inst_base { - using parent = typed_primitive_inst_base; - -public: - typed_primitive_inst(network& network, max_unpooling_node const& desc); - static layout calc_output_layout(max_unpooling_node const& node, kernel_impl_params const& impl_param); - static std::string to_string(max_unpooling_node const& node); -}; - -using max_unpooling_inst = typed_primitive_inst; - -} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/max_unpooling.cpp b/src/plugins/intel_gpu/src/graph/max_unpooling.cpp deleted file mode 100644 index daff673d6ba..00000000000 --- a/src/plugins/intel_gpu/src/graph/max_unpooling.cpp +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "max_unpooling_inst.h" -#include "primitive_type_base.h" -#include "sliding_window_utils_legacy.h" -#include "intel_gpu/runtime/error_handler.hpp" -#include "json_object.h" -#include -#include - -namespace cldnn { -primitive_type_id max_unpooling::type_id() { - static primitive_type_base instance; - return &instance; -} - -max_unpooling_node::typed_program_node(const std::shared_ptr prim, program& prog) - : parent(prim, prog) { - can_share_buffer(false); // for max_unpooling initial zero values are significant -} - -layout max_unpooling_inst::calc_output_layout(max_unpooling_node const& node, kernel_impl_params const& impl_param) { - assert(static_cast(impl_param.desc->output_data_type) == false && - "Output data type forcing is not supported for max_unpooling_node!"); - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(0); - auto argmax_layout = impl_param.get_input_layout(1); - - CLDNN_ERROR_NOT_EQUAL(desc->id, - "Argmax data type", - static_cast(argmax_layout.data_type), - "expected to be fp32", - static_cast(data_types::f32), - "Argmax data type is not fp32."); - - if (desc->with_output_size) { - tensor output_size(input_layout.batch(), - input_layout.feature(), - desc->output_size.spatial[0], - desc->output_size.spatial[1]); - return {input_layout.data_type, input_layout.format, output_size}; - } - - auto pad = desc->pad; - auto stride = desc->stride; - auto window_size = desc->size; - - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "stride spatial X", - stride.spatial[0], - "", - 0, - "Stride spatial X must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "stride spatial Y", - stride.spatial[1], - "", - 0, - "Stride spatial Y must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "window size spatial X", - window_size.spatial[0], - "", - 0, - "Size X (of pooling window) must be positive (>= 1)"); - CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id, - "window size spatial Y", - window_size.spatial[1], - "", - 0, - "Size Y (of pooling window) must be positive (>= 1)"); - - auto output_range = calc_sliding_window_needed_input_range(input_layout.get_tensor(), - window_size, - pad, - stride, - {1, 1, 1, 1}, - true, - 1); - - tensor output_size(input_layout.batch(), - input_layout.feature(), - output_range.spatial[0], - output_range.spatial[1]); - return {input_layout.data_type, input_layout.format, output_size}; -} - -std::string max_unpooling_inst::to_string(max_unpooling_node const& node) { - auto desc = node.get_primitive(); - auto node_info = node.desc_to_json(); - auto& input = node.input(); - auto& argmax = node.argmax(); - - std::stringstream primitive_description; - - json_composite max_unmax_unpooling_info; - max_unmax_unpooling_info.add("input", input.id()); - max_unmax_unpooling_info.add("argmax", argmax.id()); - - node_info->add("max unmax_unpooling info", max_unmax_unpooling_info); - node_info->dump(primitive_description); - - return primitive_description.str(); -} - -max_unpooling_inst::typed_primitive_inst(network& network, max_unpooling_node const& node) - : parent(network, node) {} - -} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/pooling.cpp b/src/plugins/intel_gpu/src/graph/pooling.cpp index 992c4e641b0..b5e1257da1f 100644 --- a/src/plugins/intel_gpu/src/graph/pooling.cpp +++ b/src/plugins/intel_gpu/src/graph/pooling.cpp @@ -22,7 +22,7 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node, kernel_i auto input_layout = impl_param.get_input_layout(); - auto pad = desc->pad; + auto pad = desc->pads_begin; auto stride = desc->stride; auto window_size = desc->size; @@ -46,43 +46,6 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node, kernel_i } } - if (!desc->argmax.empty()) - CLDNN_ERROR_NOT_EQUAL(desc->id, - "Pooling mode", - static_cast(desc->mode), - "should be max_with_argmax", - static_cast(pooling_mode::max_with_argmax), - "Pooling mode should be set to max_with_argmax when argmax primitive is present."); - - if (desc->mode == pooling_mode::max_with_argmax) { - CLDNN_ERROR_NOT_EQUAL(desc->id, - "Argmax primitive", - static_cast(desc->argmax.empty()), - "should not be empty", - static_cast(0), - "Argmax primitive not present despite max_with_argmax mode."); - - auto argmax_layout = impl_param.get_input_layout(1); - CLDNN_ERROR_NOT_EQUAL(desc->id, - "Argmax data type", - static_cast(argmax_layout.data_type), - "expected to be fp32", - static_cast(data_types::f32), - "Argmax data type is not fp32."); - CLDNN_ERROR_NOT_PROPER_FORMAT(desc->id, - "Input_layout.format", - input_layout.format.value, - "argmax_layout.format", - argmax_layout.format); - } - - if (desc->global_pooling) { - window_size = ov::Shape(input_layout.get_spatial_rank(), 1); - for (size_t i = 0; i < input_layout.get_spatial_rank(); i++) { - window_size[i] = input_layout.spatial(input_layout.get_spatial_rank() - i - 1); - } - } - uint32_t stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1; uint32_t stride_y = stride.size() >= 2 ? stride[stride.size() - 2] : 1; uint32_t stride_x = stride.size() >= 1 ? stride[stride.size() - 1] : 1; @@ -190,13 +153,10 @@ std::string pooling_inst::to_string(pooling_node const& node) { std::stringstream primitive_description; - bool is_global = desc->global_pooling; - json_composite pooling_info; pooling_info.add("mode", mode); pooling_info.add("stride", cldnn::to_string(strd)); pooling_info.add("kernel size", cldnn::to_string(kernel_size)); - pooling_info.add("is global", is_global ? "true" : "false"); if (desc->with_output_size) { json_composite ud_out_size_info; ud_out_size_info.add("size", desc->output_size.to_string()); diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 09c12d6f95f..a755ebcbab6 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -327,7 +327,7 @@ bool program::analyze_output_size_handling_need() { auto calc_output_range = calc_sliding_window_output_range( primInputSize, size, - ov::CoordinateDiff(prim->pad.begin(), prim->pad.end()), + ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()), prim->stride, ov::Strides(prim->stride.size(), 1), true, diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/max_unpooling_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/max_unpooling_gpu_ref.cl deleted file mode 100644 index 50107c10a8c..00000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/max_unpooling_gpu_ref.cl +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "include/batch_headers/data_types.cl" -#include "include/batch_headers/fetch_data.cl" - -KERNEL(pooling_gpu)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output, const __global float* arg_max) -{ -#if OUTPUT_LAYOUT_BFYX || OUTPUT_LAYOUT_BYXF - const uint x = (uint)get_global_id(0); - const uint y = (uint)get_global_id(1); - const uint bf = (uint)get_global_id(2); - const uint f = bf % INPUT0_FEATURE_NUM; - const uint b = bf / INPUT0_FEATURE_NUM; - - if (x >= INPUT0_SIZE_X) - { - return; - } -#elif OUTPUT_LAYOUT_YXFB - const uint x = (uint)get_global_id(1); - const uint y = (uint)get_global_id(2); - const uint bf = (uint)get_global_id(0); - const uint f = bf / INPUT0_BATCH_NUM; - const uint b = bf % INPUT0_BATCH_NUM; -#endif - - const uint input_id = GET_DATA_INDEX(INPUT0, b, f, y, x); - const uint arg_max_id = GET_DATA_INDEX(INPUT1, b, f, y, x); - const uint pool_idx = convert_uint(arg_max[arg_max_id]); - -#if OUTPUT_PADDED - const uint x_output = pool_idx % OUTPUT_SIZE_X; - const uint y_output = (pool_idx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y; - const uint f_output = (pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM; - const uint b_output = pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y / OUTPUT_FEATURE_NUM; - - const uint output_pos = GET_DATA_INDEX(OUTPUT, b_output, f_output, y_output, x_output); - output[output_pos] += ACTIVATION(input[input_id], ACTIVATION_PARAMS); -#else - output[pool_idx] += ACTIVATION(input[input_id], ACTIVATION_PARAMS); -#endif -} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_bfyx_block_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_bfyx_block_opt.cl index 173e6778bff..69aa217c61c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_bfyx_block_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_bfyx_block_opt.cl @@ -5,7 +5,7 @@ #include "include/batch_headers/data_types.cl" #include "include/batch_headers/fetch_data.cl" -#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING +#if MAX_POOLING #define INIT_VAL ACCUMULATOR_VAL_MIN #elif defined AVG_POOLING #define INIT_VAL ACCUMULATOR_VAL_ZERO @@ -15,7 +15,7 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TYPE in) { -#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING +#if MAX_POOLING return ACCUMULATOR_MAX_FUNC(tmp, in); #elif AVG_POOLING return tmp + in; @@ -25,9 +25,6 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TY KERNEL(pooling_gpu)( const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output -#if MAX_WITH_ARGMAX_POOLING - , __global float* arg_max -#endif #if HAS_FUSED_OPS_DECLS , FUSED_OPS_DECLS #endif @@ -49,12 +46,6 @@ KERNEL(pooling_gpu)( ACCUMULATOR_TYPE max_x[BLOCK_SIZE_Y]; ACCUMULATOR_TYPE result[POOL_SIZE_Y]; -#if MAX_WITH_ARGMAX_POOLING - uint arg_max_x[BLOCK_SIZE_Y] = { 0 }; - uint arg_max_result[POOL_SIZE_Y] = { 0 }; - uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b)); -#endif - for(uint i = 0; i < BLOCK_SIZE_Y; i++) { max_x[i] = INIT_VAL; @@ -65,33 +56,15 @@ KERNEL(pooling_gpu)( { for(uint i = 0; i < POOL_SIZE_X; i++) { - -#if MAX_WITH_ARGMAX_POOLING - if(input[input_idx] > max_x[j]) - arg_max_x[j] = input_idx_bfyx_no_padding; -#endif max_x[j] = FUNC_CALL(apply_pooling)(max_x[j], TO_ACCUMULATOR_TYPE(input[input_idx])); input_idx += INPUT0_X_PITCH; - -#if MAX_WITH_ARGMAX_POOLING - input_idx_bfyx_no_padding++; -#endif - } input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH); - -#if MAX_WITH_ARGMAX_POOLING - input_idx_bfyx_no_padding += (INPUT0_SIZE_X - POOL_SIZE_X); -#endif } for(uint i = 0; i < POOL_SIZE_Y; i++) { result[i] = max_x[i * STRIDE_SIZE_Y]; - -#if MAX_WITH_ARGMAX_POOLING - arg_max_result[i] = arg_max_x[i * STRIDE_SIZE_Y]; -#endif } // now we do max in "y" dimension @@ -100,10 +73,6 @@ KERNEL(pooling_gpu)( for(uint j = 1; j < POOL_SIZE_Y; j++) { -#if MAX_WITH_ARGMAX_POOLING - if(max_x[j + i * STRIDE_SIZE_Y] > result[i]) - arg_max_result[i] = arg_max_x[j + i * STRIDE_SIZE_Y]; -#endif result[i] = FUNC_CALL(apply_pooling)(result[i], max_x[j + i * STRIDE_SIZE_Y]); } @@ -111,10 +80,6 @@ KERNEL(pooling_gpu)( uint output_pos = GET_DATA_INDEX(OUTPUT, b, f, y, x); -#if MAX_WITH_ARGMAX_POOLING - uint arg_max_pos = GET_DATA_INDEX(INPUT1, b, f, y, x); -#endif - OUTPUT_TYPE final_result; ACTIVATION_TYPE pool_result; @@ -134,10 +99,6 @@ KERNEL(pooling_gpu)( #endif output[output_pos] = final_result; output_pos += OUTPUT_Y_PITCH; -#if MAX_WITH_ARGMAX_POOLING - arg_max[arg_max_pos] = arg_max_result[i]; - arg_max_pos += INPUT1_Y_PITCH; -#endif } } } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_ref.cl index 2a4a6e4c30c..5c58d6117c2 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_ref.cl @@ -5,7 +5,7 @@ #include "include/batch_headers/data_types.cl" #include "include/batch_headers/fetch_data.cl" -#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING +#if MAX_POOLING #define INIT_VAL ACCUMULATOR_VAL_MIN #elif AVG_POOLING #define INIT_VAL ACCUMULATOR_VAL_ZERO @@ -15,7 +15,7 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TYPE in) { -#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING +#if MAX_POOLING return ACCUMULATOR_MAX_FUNC(tmp, in); #elif AVG_POOLING return tmp + in; @@ -25,9 +25,6 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TY KERNEL(pooling_gpu)( const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output -#if MAX_WITH_ARGMAX_POOLING -, __global float* arg_max -#endif #ifdef SELECTED_INDICES_TYPE , __global SELECTED_INDICES_TYPE* indices #endif @@ -98,10 +95,6 @@ KERNEL(pooling_gpu)( uint result_idx = 0; #endif -#if MAX_WITH_ARGMAX_POOLING - uint arg_max_idx = 0; -#endif - #ifdef CHECK_BOUNDARY if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X || offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y || @@ -163,19 +156,6 @@ KERNEL(pooling_gpu)( const uint input_idx = batch_and_feature_offset + input_offset_y*INPUT0_Y_PITCH + input_offset_x*INPUT0_X_PITCH; #endif #endif - -#if MAX_WITH_ARGMAX_POOLING - if(input[input_idx] > result) - { -#if OUTPUT_DIMS < 5 - const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b)); -#else - const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y * - (input_offset_z + INPUT0_SIZE_Z * (f + INPUT0_FEATURE_NUM * b))); -#endif - arg_max_idx = input_idx_bfyx_no_padding; - } -#endif const ACCUMULATOR_TYPE casted_input = TO_ACCUMULATOR_TYPE(input[input_idx]); #ifdef SELECTED_INDICES_TYPE if (casted_input > result) @@ -220,14 +200,6 @@ KERNEL(pooling_gpu)( uint input_idx = INPUT0_GET_INDEX(b, f, offset_y, offset_x); #endif -#if MAX_WITH_ARGMAX_POOLING - #if OUTPUT_DIMS < 5 - uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b)); - #else - uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (offset_z + INPUT0_SIZE_Z *(f + INPUT0_FEATURE_NUM * b))); - #endif -#endif - #if OUTPUT_DIMS == 5 for(uint l = 0; l < POOL_SIZE_Z; l++) { @@ -236,11 +208,6 @@ KERNEL(pooling_gpu)( { for(uint i = 0; i < POOL_SIZE_X; i++) { -#if MAX_WITH_ARGMAX_POOLING - if(input[input_idx] > result) - arg_max_idx = input_idx_bfyx_no_padding; -#endif - #if OUTPUT_DIMS == 5 #if !INPUT0_SIMPLE uint input_idx = INPUT0_GET_INDEX(b, f, offset_z + l, offset_y + j, offset_x + i); @@ -267,21 +234,11 @@ KERNEL(pooling_gpu)( input_idx += INPUT0_X_PITCH; #endif #endif - -#if MAX_WITH_ARGMAX_POOLING - input_idx_bfyx_no_padding++; -#endif } input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH); -#if MAX_WITH_ARGMAX_POOLING - input_idx_bfyx_no_padding += (INPUT0_SIZE_X - POOL_SIZE_X); -#endif } #if OUTPUT_DIMS == 5 // 3D input_idx += (INPUT0_Z_PITCH - POOL_SIZE_Y*INPUT0_Y_PITCH); -#if MAX_WITH_ARGMAX_POOLING - input_idx_bfyx_no_padding += (INPUT0_SIZE_Y - POOL_SIZE_Y); -#endif } #endif @@ -322,12 +279,6 @@ KERNEL(pooling_gpu)( #endif indices[output_pos] = TO_SELECTED_INDICES_TYPE(result_idx); #endif - -#if MAX_WITH_ARGMAX_POOLING - //INPUT1 macro stands for Argmax - const uint arg_max_pos = GET_DATA_INDEX_5D(INPUT1, b, f, z, y, x); - arg_max[arg_max_pos] = convert_float(arg_max_idx); -#endif } #undef INIT_VAL diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common_types.h index 0a76ed0ed16..bc1cdde5c07 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h @@ -38,7 +38,6 @@ enum class KernelType { RESAMPLE, REGION_YOLO, REORG_YOLO, - MAX_UNPOOLING, MVN, LSTM_GEMM, LSTM_ELT, @@ -184,7 +183,6 @@ enum class ActivationFunction { enum class PoolType { MAX, AVG, - MAX_WITH_ARGMAX, BILINEAR, DEFORMABLE_BILINEAR }; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp index 170200261da..37fadd81e4d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp @@ -242,7 +242,6 @@ std::string toString(PoolType mode) { switch (mode) { case PoolType::MAX: return "MAX"; case PoolType::AVG: return "AVG"; - case PoolType::MAX_WITH_ARGMAX: return "MAX_WITH_ARGMAX"; case PoolType::BILINEAR: return "BILINEAR"; case PoolType::DEFORMABLE_BILINEAR: return "DEFORMABLE_BILINEAR"; default: return ""; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp index f4c59f6ed29..17a7880ddc2 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp @@ -213,9 +213,6 @@ void ParamsKey::EnablePoolType(PoolType t) { case PoolType::AVG: key.restrict.val.dedicated.pooling.avg = 1; break; - case PoolType::MAX_WITH_ARGMAX: - key.restrict.val.dedicated.pooling.max_with_argmax = 1; - break; case PoolType::BILINEAR: key.restrict.val.dedicated.pooling.bilinear = 1; break; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h index f596421174d..10f36064a86 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h @@ -106,7 +106,6 @@ public: uint32_t max : 1; uint32_t avg : 1; uint32_t floor : 1; - uint32_t max_with_argmax : 1; uint32_t ceil : 1; uint32_t bilinear : 1; uint32_t deformable_bilinear : 1; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.cpp deleted file mode 100644 index 330979e5585..00000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "max_unpooling_kernel_base.h" -#include - -namespace kernel_selector { -bool MaxUnpoolingKernelBase::Validate(const Params& p, const optional_params& o) const { - if (p.GetType() != KernelType::MAX_UNPOOLING || o.GetType() != KernelType::MAX_UNPOOLING) { - return false; - } - - return true; -} - -JitConstants MaxUnpoolingKernelBase::GetJitConstants(const max_unpooling_params& params) const { - JitConstants jit = MakeBaseParamsJitConstants(params); - if (params.outputs[0].PitchesDifferFromLogicalDims()) - jit.AddConstant(MakeJitConstant("OUTPUT_PADDED", 1)); - return jit; -} - -MaxUnpoolingKernelBase::DispatchData MaxUnpoolingKernelBase::SetDefault(const max_unpooling_params& params) const { - const auto& input = params.inputs[0]; - - DispatchData dispatchData; - - if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::byxf) { - // Determine global work sizes. - dispatchData.gws[2] = input.Batch().v * input.Feature().v; // B, F - dispatchData.gws[0] = Align(input.X().v, 32); // X - dispatchData.gws[1] = input.Y().v; // Y - - dispatchData.lws[0] = 32; - dispatchData.lws[1] = 1; - dispatchData.lws[2] = 1; - } else { - // Determine global work sizes. - dispatchData.gws[0] = input.Batch().v * input.Feature().v; // B, F - dispatchData.gws[1] = input.X().v; // X - dispatchData.gws[2] = input.Y().v; // Y - - dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast(1)), static_cast(32)); - while (dispatchData.gws[0] % dispatchData.lws[0] != 0) { - --dispatchData.lws[0]; - } - dispatchData.lws[1] = 1; - dispatchData.lws[2] = 1; - } - - return dispatchData; -} - -KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params, - const optional_params& options) const { - if (!Validate(params, options)) { - return {}; - } - - const max_unpooling_params& orgParams = static_cast(params); - - DispatchData dispatchData = SetDefault(orgParams); - - KernelData kd = KernelData::Default(params); - - auto cldnn_jit = GetJitConstants(orgParams); - auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options); - auto jit = CreateJit(kernelName, cldnn_jit, entry_point); - - auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); - kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); - - return {kd}; -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.h deleted file mode 100644 index 0aaac7dca4a..00000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "kernel_base_opencl.h" -#include "kernel_selector_params.h" - -namespace kernel_selector { -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// max_unpooling_params -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct max_unpooling_params : public base_params { - max_unpooling_params() : base_params(KernelType::MAX_UNPOOLING) {} -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// max_unpooling_optional_params -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct max_unpooling_optional_params : optional_params { - max_unpooling_optional_params() : optional_params(KernelType::MAX_UNPOOLING) {} -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// MaxUnpoolingKernelBase -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class MaxUnpoolingKernelBase : public KernelBaseOpenCL { -public: - using KernelBaseOpenCL::KernelBaseOpenCL; - virtual ~MaxUnpoolingKernelBase() {} - - struct DispatchData : public CommonDispatchData { - bool needsBoundary = false; - }; - -protected: - bool Validate(const Params&, const optional_params&) const override; - virtual JitConstants GetJitConstants(const max_unpooling_params& params) const; - virtual DispatchData SetDefault(const max_unpooling_params& params) const; - KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const; -}; -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.cpp deleted file mode 100644 index beb0fddc196..00000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "max_unpooling_kernel_gpu_ref.h" - -namespace kernel_selector { -ParamsKey MaxUnpoolingKernelGPURef::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::F16); - k.EnableInputDataType(Datatype::F32); - k.EnableInputDataType(Datatype::INT8); - k.EnableOutputDataType(Datatype::F16); - k.EnableOutputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::INT8); - k.EnableInputLayout(DataLayout::bfyx); - k.EnableInputLayout(DataLayout::yxfb); - k.EnableInputLayout(DataLayout::byxf); - k.EnableOutputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::yxfb); - k.EnableOutputLayout(DataLayout::byxf); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableBatching(); - k.EnableDifferentTypes(); - return k; -} - -KernelsData MaxUnpoolingKernelGPURef::GetKernelsData(const Params& params, const optional_params& options) const { - return GetCommonKernelsData(params, options); -} - -KernelsPriority MaxUnpoolingKernelGPURef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return FORCE_PRIORITY_9; -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.h deleted file mode 100644 index c63172b5a50..00000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "max_unpooling_kernel_base.h" - -namespace kernel_selector { -class MaxUnpoolingKernelGPURef : public MaxUnpoolingKernelBase { -public: - MaxUnpoolingKernelGPURef() : MaxUnpoolingKernelBase("max_unpooling_gpu_ref") {} - virtual ~MaxUnpoolingKernelGPURef() {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; - ParamsKey GetSupportedKey() const override; -}; -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.cpp deleted file mode 100644 index fe240e818b7..00000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "max_unpooling_kernel_selector.h" -#include "max_unpooling_kernel_gpu_ref.h" - -namespace kernel_selector { - -max_unpooling_kernel_selector::max_unpooling_kernel_selector() { Attach(); } - -KernelsData max_unpooling_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const { - return GetNaiveBestKernel(params, options, KernelType::MAX_UNPOOLING); -} -} // namespace kernel_selector \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.h deleted file mode 100644 index e392d6530db..00000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "kernel_selector.h" - -namespace kernel_selector { -class max_unpooling_kernel_selector : public kernel_selector_base { -public: - static max_unpooling_kernel_selector& Instance() { - static max_unpooling_kernel_selector instance_; - return instance_; - } - - max_unpooling_kernel_selector(); - - virtual ~max_unpooling_kernel_selector() {} - - KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; -}; -} // namespace kernel_selector \ No newline at end of file diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_base.cpp index d90845df640..61239335085 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_base.cpp @@ -203,8 +203,6 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params, FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); uint32_t param_idx = 1; - if (orgParams.poolType == PoolType::MAX_WITH_ARGMAX) - kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++}); if (orgParams.maxPoolOpset8Features) { kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++}); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp index 41d4579cd20..1c2e6009316 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp @@ -20,7 +20,6 @@ ParamsKey PoolingKernel_b_fs_yx_fsv16::GetSupportedKey() const { k.EnableBatching(); k.EnablePoolType(PoolType::MAX); k.EnablePoolType(PoolType::AVG); - k.EnablePoolType(PoolType::MAX_WITH_ARGMAX); k.EnablePoolRemainder(PoolRemainder::FLOOR); k.EnablePoolRemainder(PoolRemainder::CEIL); k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp index 0890a128448..68a8d8d04ac 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp @@ -20,7 +20,6 @@ ParamsKey PoolingKernelGPUBfyxBlockOpt::GetSupportedKey() const { k.EnableBatching(); k.EnablePoolType(PoolType::MAX); k.EnablePoolType(PoolType::AVG); - k.EnablePoolType(PoolType::MAX_WITH_ARGMAX); k.EnablePoolRemainder(PoolRemainder::FLOOR); k.EnablePoolRemainder(PoolRemainder::CEIL); k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp index f8d52984c6f..7640aff38fd 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp @@ -90,8 +90,8 @@ bool PoolingKernelGPUInt8Ref::Validate(const Params& params, const optional_para if (p.inputs[0].GetDType() == Datatype::INT8 || p.inputs[0].GetDType() == Datatype::UINT8) { // Max pooling doesn't change quantization ranges, so output data type should be the same as input - if ((p.poolType == PoolType::MAX || p.poolType == PoolType::MAX_WITH_ARGMAX) - && (p.outputs[0].GetDType() != p.inputs[0].GetDType()) && p.quantization == QuantizationType::NONE) + if (p.poolType == PoolType::MAX && p.outputs[0].GetDType() != p.inputs[0].GetDType() && + p.quantization == QuantizationType::NONE) return false; // Average pooling should produce FP by default. (u)int8 is possible when quantize op is fused. // if (p.poolType == PoolType::AVG && diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp index f2b5640754f..f6321381438 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp @@ -20,7 +20,6 @@ ParamsKey PoolingKernelGPURef::GetSupportedKey() const { k.EnableBatching(); k.EnablePoolType(PoolType::MAX); k.EnablePoolType(PoolType::AVG); - k.EnablePoolType(PoolType::MAX_WITH_ARGMAX); k.EnablePoolRemainder(PoolRemainder::FLOOR); k.EnablePoolRemainder(PoolRemainder::CEIL); k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED); diff --git a/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp b/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp index 556ace5ea0b..8775ffe4187 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp @@ -36,9 +36,9 @@ static void CreateAvgPoolOp(Program& p, const std::shared_ptrget_output_shape(0)), cldnn::element_type_to_data_type(op->get_output_element_type(0))); - poolPrim.pad_end = pads_end; p.add_primitive(*op, poolPrim); } @@ -64,9 +64,9 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptrget_output_shape(0)), cldnn::element_type_to_data_type(op->get_output_element_type(0))); - poolPrim.pad_end = pads_end; p.add_primitive(*op, poolPrim); } diff --git a/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp index 64bc3a61556..b9754dce887 100644 --- a/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp @@ -148,11 +148,12 @@ TEST_P(pooling_f32_activation, basic) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 3); ov::Strides stride(r, 1); - ov::Shape pad(r, 1); + ov::Shape pads_begin(r, 1); + ov::Shape pads_end(r, 1); create_topologies( input_layout("input", get_input_layout(p)), - pooling("pooling", "input", p.pool_mode, kernel, stride, pad), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), activation("act", "pooling", activation_func::relu), reorder("output_reorder", "act", format::bfyx, data_types::f32) ); @@ -185,12 +186,13 @@ TEST_P(pooling_f32_scale, basic) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 3); ov::Strides stride(r, 1); - ov::Shape pad(r, 1); + ov::Shape pads_begin(r, 1); + ov::Shape pads_end(r, 1); create_topologies( input_layout("input", get_input_layout(p)), data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 9.0f)), - pooling("pooling", "input", p.pool_mode, kernel, stride, pad), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type), reorder("output_reorder", "scale", format::bfyx, data_types::f32) ); @@ -205,12 +207,13 @@ TEST_P(pooling_f32_scale, fp16_scale_out) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 3); ov::Strides stride(r, 1); - ov::Shape pad(r, 1); + ov::Shape pads_begin(r, 1); + ov::Shape pads_end(r, 1); create_topologies( input_layout("input", get_input_layout(p)), data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 9.0f)), - pooling("pooling", "input", p.pool_mode, kernel, stride, pad), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, data_types::f16), reorder("output_reorder", "scale", format::bfyx, data_types::f32) ); @@ -241,7 +244,8 @@ TEST_P(pooling_scale_activation_quantize, basic) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 4); ov::Strides stride(r, 2); - ov::Shape pad(r, 0); + ov::Shape pads_begin(r, 0); + ov::Shape pads_end(r, 0); create_topologies( input_layout("input", get_input_layout(p)), @@ -250,7 +254,7 @@ TEST_P(pooling_scale_activation_quantize, basic) { data("out_lo", get_mem(get_single_element_layout(p), 0)), data("out_hi", get_mem(get_single_element_layout(p), 255)), data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)), - pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type), activation("activation", "scale", activation_func::relu), quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::u8), @@ -267,7 +271,8 @@ TEST_P(pooling_scale_activation_quantize, i8_output_data_type) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 4); ov::Strides stride(r, 2); - ov::Shape pad(r, 0); + ov::Shape pads_begin(r, 0); + ov::Shape pads_end(r, 0); create_topologies( input_layout("input", get_input_layout(p)), @@ -276,7 +281,7 @@ TEST_P(pooling_scale_activation_quantize, i8_output_data_type) { data("out_lo", get_mem(get_single_element_layout(p), -127, 127)), data("out_hi", get_mem(get_single_element_layout(p), -127, 127)), data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)), - pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type), activation("activation", "scale", activation_func::relu), quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8), @@ -293,7 +298,8 @@ TEST_P(pooling_scale_activation_quantize, per_channel) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 4); ov::Strides stride(r, 2); - ov::Shape pad(r, 0); + ov::Shape pads_begin(r, 0); + ov::Shape pads_end(r, 0); create_topologies( input_layout("input", get_input_layout(p)), @@ -302,7 +308,7 @@ TEST_P(pooling_scale_activation_quantize, per_channel) { data("out_lo", get_mem(get_single_element_layout(p), 0)), data("out_hi", get_mem(get_single_element_layout(p), 255)), data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)), - pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type), activation("activation", "scale", activation_func::hyperbolic_tan), quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::u8), @@ -362,12 +368,13 @@ TEST_P(pooling_scale_activation, basic) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 4); ov::Strides stride(r, 2); - ov::Shape pad(r, 0); + ov::Shape pads_begin(r, 0); + ov::Shape pads_end(r, 0); create_topologies( input_layout("input", get_input_layout(p)), data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)), - pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type), activation("activation", "scale", activation_func::relu), reorder("output_reorder", "activation", p.default_format, data_types::f32) @@ -383,12 +390,13 @@ TEST_P(pooling_scale_activation, eltwise_mul) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 4); ov::Strides stride(r, 2); - ov::Shape pad(r, 0); + ov::Shape pads_begin(r, 0); + ov::Shape pads_end(r, 0); create_topologies( input_layout("input", get_input_layout(p)), data("scale_data", get_mem(get_per_channel_layout(p))), - pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type), activation("activation", "scale", activation_func::relu), reorder("output_reorder", "activation", p.default_format, data_types::f32) @@ -577,11 +585,12 @@ TEST_P(pooling_onednn_activation1, basic) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 3); ov::Strides stride(r, 1); - ov::Shape pad(r, 1); + ov::Shape pads_begin(r, 1); + ov::Shape pads_end(r, 1); create_topologies( input_layout("input", get_input_layout(p)), - pooling("pooling", "input", p.pool_mode, kernel, stride, pad), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), activation("act", "pooling", activation_func::relu), reorder("output_reorder", "act", format::bfyx, data_types::f32) ); @@ -597,10 +606,12 @@ TEST_P(pooling_onednn_activation2, basic) { auto r = get_input_layout(p).get_spatial_rank(); ov::Shape kernel(r, 3); ov::Strides stride(r, 1); + ov::Shape pads_begin(r, 0); + ov::Shape pads_end(r, 0); create_topologies( input_layout("input", get_input_layout(p)), - pooling("pooling", "input", p.pool_mode, kernel, stride), + pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end), activation("act", "pooling", activation_func::relu), reorder("output_reorder", "act", format::bfyx, data_types::f32) ); diff --git a/src/plugins/intel_gpu/tests/test_cases/max_unpooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/max_unpooling_gpu_test.cpp deleted file mode 100644 index 48ef4ba3613..00000000000 --- a/src/plugins/intel_gpu/tests/test_cases/max_unpooling_gpu_test.cpp +++ /dev/null @@ -1,442 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -#include "test_utils.h" - -#include -#include -#include -#include -#include -#include - -using namespace cldnn; -using namespace ::tests; - -TEST(max_unpooling_gpu, basic_in2x3x2x2) { - // Input : 2x2x2x1 - // Argmax : 2x2x2x1 - // Output : 2x2x3x2 - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Input: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - // - // Output: - // f0: b0: 0 0 0 b1: 0 0 0 - // f0: b0: 0 4 0 b1: 0.5 0 0 - // f1: b0: 0 0 0 b1: 0 0 0 - // f1: b0: 0 8 16 b1: 12 0 17 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - - set_values(input, { - 4.0f, 4.0f, - 8.0f, 16.0f, - 0.5f, 0.0f, - 12.0f, 17.0f - }); - - set_values(arg_max, { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(data("arg_max", arg_max)); - topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("max_unpooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - - EXPECT_EQ(output_layout.format, format::bfyx); - EXPECT_EQ(output_layout.spatial(1), 2); - EXPECT_EQ(output_layout.spatial(0), 3); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_output_vec = { - 0.f, 0.f, 0.f, - 0.f, 4.f, 0.f, - 0.f, 0.f, 0.f, - 0.f, 8.f, 16.f, - 0.f, 0.f, 0.f, - 0.5f, 0.f, 0.f, - 0.f, 0.f, 0.f, - 12.f, 0.f, 17.f, - }; - - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - EXPECT_EQ(expected_output_vec[i], output_ptr[i]); - } -} - -TEST(max_unpooling_gpu, basic_in2x3x2x2_output_padding) { - // Input : 2x2x2x1 - // Argmax : 2x2x2x1 - // Output : 2x2x3x2 - // Output Padding : 0x0x1x1 - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Input: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - // - // Output: - // f0: b0: 0 0 0 b1: 0 0 0 - // f0: b0: 0 4 0 b1: 0.5 0 0 - // f1: b0: 0 0 0 b1: 0 0 0 - // f1: b0: 0 8 16 b1: 12 0 17 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - - set_values(input, { - 4.0f, 4.0f, - 8.0f, 16.0f, - 0.5f, 0.0f, - 12.0f, 17.0f - }); - - set_values(arg_max, { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(data("arg_max", arg_max)); - topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, padding({ 0, 0, 1, 1 }, 0))); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("max_unpooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - - EXPECT_EQ(output_layout.format, format::bfyx); - EXPECT_EQ(output_layout.spatial(1), 2); - EXPECT_EQ(output_layout.spatial(0), 3); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_output_vec = { - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 4.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 8.f, 16.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.5f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 12.f, 0.f, 17.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, - }; - std::vector out; - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - out.push_back(output_ptr[i]); - EXPECT_EQ(expected_output_vec[i], output_ptr[i]); - } -} - -TEST(max_unpooling_gpu, basic_in2x3x2x2_output_size) { - // Input : 2x2x2x1 - // Argmax : 2x2x2x1 - // Output : 2x2x3x2 - // Output size explicitly provided - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Input: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - // - // Output: - // f0: b0: 0 0 0 b1: 0 0 0 - // f0: b0: 0 4 0 b1: 0.5 0 0 - // f1: b0: 0 0 0 b1: 0 0 0 - // f1: b0: 0 8 16 b1: 12 0 17 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - - set_values(input, { - 4.0f, 4.0f, - 8.0f, 16.0f, - 0.5f, 0.0f, - 12.0f, 17.0f - }); - - set_values(arg_max, { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(data("arg_max", arg_max)); - topology.add(max_unpooling("max_unpooling", "input", "arg_max", {2, 2, 3, 2})); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("max_unpooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - - EXPECT_EQ(output_layout.format, format::bfyx); - EXPECT_EQ(output_layout.spatial(1), 2); - EXPECT_EQ(output_layout.spatial(0), 3); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_output_vec = { - 0.f, 0.f, 0.f, - 0.f, 4.f, 0.f, - 0.f, 0.f, 0.f, - 0.f, 8.f, 16.f, - 0.f, 0.f, 0.f, - 0.5f, 0.f, 0.f, - 0.f, 0.f, 0.f, - 12.f, 0.f, 17.f, - }; - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - EXPECT_EQ(expected_output_vec[i], output_ptr[i]); - } -} - -TEST(max_unpooling_gpu, basic_in2x3x2x2_fp16) { - // Input : 2x2x2x1 - // Argmax : 2x2x2x1 - // Output : 2x2x3x2 - // Input values in fp16 - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Input: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - // - // Output: - // f0: b0: 0 0 0 b1: 0 0 0 - // f0: b0: 0 4 0 b1: 0.5 0 0 - // f1: b0: 0 0 0 b1: 0 0 0 - // f1: b0: 0 8 16 b1: 12 0 17 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 2, 1 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - - set_values(input, { - FLOAT16(4.0f), FLOAT16(4.0f), - FLOAT16(8.0f), FLOAT16(16.0f), - FLOAT16(0.5f), FLOAT16(0.0f), - FLOAT16(12.0f), FLOAT16(17.0f) - }); - - set_values(arg_max, { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(data("arg_max", arg_max)); - topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("max_unpooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - - EXPECT_EQ(output_layout.format, format::bfyx); - EXPECT_EQ(output_layout.spatial(1), 2); - EXPECT_EQ(output_layout.spatial(0), 3); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_output_vec = { - 0.f, 0.f, 0.f, - 0.f, 4.f, 0.f, - 0.f, 0.f, 0.f, - 0.f, 8.f, 16.f, - 0.f, 0.f, 0.f, - 0.5f, 0.f, 0.f, - 0.f, 0.f, 0.f, - 12.f, 0.f, 17.f, - }; - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - EXPECT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i])); - } -} - -TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) { - // Input : 2x2x3x2 - // Argmax : 2x2x2x1 - // Output : 2x2x2x2 - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Forward Max Pooling Output: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - // - // Unpooling output: - // f0: b0: 0 0 0 b1: 0 0 0 - // f0: b0: 0 4 0 b1: 0.5 0 0 - // f1: b0: 0 0 0 b1: 0 0 0 - // f1: b0: 0 8 16 b1: 12 0 17 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - - set_values(input, { - 1.0f, 2.0f, -10.f, - 3.0f, 4.0f, -14.f, - 5.0f, 6.0f, -12.f, - 7.0f, 8.0f, 16.0f, - 0.f, 0.f, -11.f, - 0.5f, -0.5f, -15.f, - 1.5f, 5.2f, -13.f, - 12.f, 9.f, 17.f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(mutable_data("arg_max", arg_max)); - topology.add(pooling("pooling_max_with_argmax", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 })); - topology.add(max_unpooling("max_unpooling", "pooling_max_with_argmax", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("max_unpooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); - - EXPECT_EQ(output_layout.format, format::bfyx); - EXPECT_EQ(output_layout.spatial(1), 2); - EXPECT_EQ(output_layout.spatial(0), 3); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_argmax_vec = { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }; - - std::vector expected_output_vec = { - 0.f, 0.f, 0.f, - 0.f, 4.f, 0.f, - 0.f, 0.f, 0.f, - 0.f, 8.f, 16.f, - 0.f, 0.f, 0.f, - 0.5f, 0.f, 0.f, - 0.f, 0.f, 0.f, - 12.f, 0.f, 17.f, - }; - - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - EXPECT_EQ(expected_output_vec[i], output_ptr[i]); - } - - for (size_t i = 0; i < expected_argmax_vec.size(); ++i) { - EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]); - } -} diff --git a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp index ecf3fc61297..e1704086622 100644 --- a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp @@ -125,18 +125,12 @@ VVVF::type> reference_pooling(const V int stride_z, int offset_x, int offset_y, - int offset_z, - bool global_pooling) { + int offset_z) { using output_t = typename pooling_mode_output::type; VVVF result; auto size_x = input[0][0].size(); auto size_y = input[0].size(); auto size_z = input.size(); - if (global_pooling) { - pool_z = size_z; - pool_y = size_y; - pool_x = size_x; - } auto accumulator = pooling_accumulator(); @@ -279,141 +273,6 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) { EXPECT_EQ(2.0f, output_ptr[0]); } -TEST(pooling_forward_gpu, basic_max_yxfb_f32_global_i3x3x1x1_nopad) { - // Brief test description. - // - // Pool mode: max - // Global pooling: true - // Padding: none - // - // Input data: - // [-0.5, 1.0, 0.5] - // [ 2.0, 1.5, -0.5] - // [ 0.0, -1.0, 0.5] - // - // Expected output: - // [ 2.0] - - auto& engine = get_test_engine(); - - auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 3, 3 } }); - - topology topology; - topology.add(input_layout("input_prim", input_prim->get_layout())); - topology.add(pooling("pool_prim", "input_prim", pooling_mode::max)); - - network network(engine, topology); - set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f }); - network.set_input_data("input_prim", input_prim); - - auto outputs = network.execute(); - EXPECT_EQ(outputs.size(), size_t(1)); - EXPECT_EQ(outputs.begin()->first, "pool_prim"); - - auto output_prim = outputs.begin()->second.get_memory(); - - cldnn::mem_lock output_ptr (output_prim, get_test_stream()); - - EXPECT_EQ(2.0f, output_ptr[0]); -} - -TEST(pooling_forward_gpu, basic_max_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) { - // Brief test description. - // - // Pool mode: max - // Global pooling: true - // Padding: none - - auto& engine = get_test_engine(); - - auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } }); - - topology topology; - topology.add(input_layout("input_prim", input_prim->get_layout())); - topology.add(pooling("pool_prim", "input_prim", pooling_mode::max)); - - network network(engine, topology); - std::vector vals = { - 0, 3, 2, -1, 6, 8, 3, -9, 6, -1, 1, 7, -1, 6, 18, 3, - -9, 5, -2, 2, 6, -1, 6, 7, 3, -9, 6, -3, 3, 5, -1, 16, - 8, 3, -9, 6, -4, 4, 3, -1, 6, 8, 33, -9, 6, -5, 5, 21, - -1, 6, 8, 3, -9, 6, -5, 36, 2, -1, 6, 8, 3, -9, 6, -6, - 6, 1, -1, 6, 8, 3, -9, 66, -7, 7, 29, -1, 6, 8, 3, -9, - 6, 44, 8, -2, -1, 6, 8, 3, -9, 6, -8, 9, -1, 10, 6, 8, - 3, -9, 6, -9, 10, -3, -1, 6, 8, 3, 99, 6, -10, 11, -4, -1, - 6, 8, 3, -9, 64, -11, 12, -5, -1, 6, 8, 38, -9, 6, -12, 13, - -2, -1, 6, 81, 3, -9, 6, -13, 14, -2, -1, 64, 8, 3, -9, 6, - }; - set_values(input_prim, vals); - network.set_input_data("input_prim", input_prim); - - auto outputs = network.execute(); - EXPECT_EQ(outputs.size(), size_t(1)); - EXPECT_EQ(outputs.begin()->first, "pool_prim"); - - auto output_prim = outputs.begin()->second.get_memory(); - - cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - - std::vector answers = { 8, 44, 8, 81, 64, 8, 12, 66, 14, 8, 99, 64, 8, 11, 18, 21 }; - - ASSERT_EQ(answers.size(), output_ptr.size()); - for (size_t i = 0; i < output_ptr.size(); ++i) { - EXPECT_EQ(answers[i], output_ptr[i]); - } -} - -TEST(pooling_forward_gpu, basic_avg_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) { - // Brief test description. - // - // Pool mode: avg - // Global pooling: true - // Padding: none - - auto& engine = get_test_engine(); - - auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } }); - - topology topology; - topology.add(input_layout("input_prim", input_prim->get_layout())); - topology.add(pooling("pool_prim", "input_prim", pooling_mode::average)); - - network network(engine, topology); - std::vector vals = { - 0, 3, 2, -1, 6, 8, 3, -9, 6, -1, 1, 7, -1, 6, 18, 3, - -9, 5, -2, 2, 6, -1, 6, 7, 3, -9, 6, -3, 3, 5, -1, 16, - 8, 3, -9, 6, -4, 4, 3, -1, 6, 8, 33, -9, 6, -5, 5, 21, - -1, 6, 8, 3, -9, 6, -5, 36, 2, -1, 6, 8, 3, -9, 6, -6, - 6, 1, -1, 6, 8, 3, -9, 66, -7, 7, 29, -1, 6, 8, 3, -9, - 6, 44, 8, -2, -1, 6, 8, 3, -9, 6, -8, 9, -1, 10, 6, 8, - 3, -9, 6, -9, 10, -3, -1, 6, 8, 3, 99, 6, -10, 11, -4, -1, - 6, 8, 3, -9, 64, -11, 12, -5, -1, 6, 8, 38, -9, 6, -12, 13, - -2, -1, 6, 81, 3, -9, 6, -13, 14, -2, -1, 64, 8, 3, -9, 6, - }; - set_values(input_prim, vals); - network.set_input_data("input_prim", input_prim); - - auto outputs = network.execute(); - EXPECT_EQ(outputs.size(), size_t(1)); - EXPECT_EQ(outputs.begin()->first, "pool_prim"); - - auto output_prim = outputs.begin()->second.get_memory(); - - cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - - std::vector answers = { - 29, 199, 241, 63, 85, 85, 213, 64, 85, 85, 21, 64, 142, 227, 8, 65, - 57, 142, 19, 65, 171, 170, 170, 62, 57, 142, 35, 64, 0, 0, 32, 65, - 199, 113, 28, 64, 29, 199, 241, 63, 29, 199, 153, 65, 57, 142, 83, 65, - 228, 56, 14, 63, 142, 227, 120, 64, 171, 170, 170, 63, 85, 85, 181, 64, - }; - - ASSERT_EQ(answers.size(), output_ptr.size()); - for (size_t i = 0; i < output_ptr.size(); ++i) { - EXPECT_EQ(answers[i], output_ptr[i]) << i; - } -} - TEST(pooling_forward_gpu, basic_max_pooling_int8) { auto& engine = get_test_engine(); @@ -966,7 +825,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out topology topology; topology.add(input_layout("input_prim", input_prim->get_layout())); - topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 2, 2}, 0})); + topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 2, 2}, 0})); network network(engine, topology); set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f }); @@ -1027,7 +886,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out topology topology; topology.add(input_layout("input_prim", input_prim->get_layout())); - topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0})); + topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0})); network network(engine, topology); @@ -1098,7 +957,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp topology topology; topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ {0,0,1,2}, 0 }))); - topology.add(pooling("pool_prim", "reorder", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 2, 2}, 0})); + topology.add(pooling("pool_prim", "reorder", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 2, 2}, 0})); network network(engine, topology); set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f }); @@ -1161,7 +1020,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp topology topology; topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 }))); - topology.add(pooling("pool_prim", "reorder", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0})); + topology.add(pooling("pool_prim", "reorder", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0})); network network(engine, topology); @@ -1232,7 +1091,7 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou topology topology; topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); - topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, padding{ { 0, 0, 2, 2 }, 0 })); + topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, 2, 2 }, 0 })); network network(engine, topology); set_values(input_prim, { @@ -1300,7 +1159,7 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou topology topology; topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); - topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0})); + topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0})); network network(engine, topology); @@ -1337,407 +1196,6 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou } } -TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) { - // Input : 2x2x3x2 - // Argmax : 2x2x2x1 - // Output : 2x2x2x2 - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Output: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 2 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); - - set_values(input, { - 1.0f, 2.0f, -10.f, - 3.0f, 4.0f, -14.f, - 5.0f, 6.0f, -12.f, - 7.0f, 8.0f, 16.0f, - 0.f, 0.f, -11.f, - 0.5f, -0.5f, -15.f, - 1.5f, 5.2f, -13.f, - 12.f, 9.f, 17.f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(mutable_data("arg_max", arg_max)); - topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 })); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("pooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); - - EXPECT_EQ(output_layout.format, format::bfyx); - EXPECT_EQ(output_layout.spatial(1), 1); - EXPECT_EQ(output_layout.spatial(0), 2); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_argmax_vec = { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }; - - std::vector expected_output_vec = { - 4.0f, 4.0f, - 8.0f, 16.0f, - 0.5f, 0.0f, - 12.0f, 17.0f - }; - - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - EXPECT_EQ(expected_output_vec[i], output_ptr[i]); - EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]); - } -} - -TEST(pooling_forward_gpu, basic_in2x2x3x2x1_max_with_argmax) { - // Input : 2x2x3x2x1 - // Argmax : 2x2x2x1x1 - // Output : 2x2x2x2x1 - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Output: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 3, 2, 1 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 2, 1, 1 } }); - - set_values(input, { - 1.0f, 2.0f, -10.f, - 3.0f, 4.0f, -14.f, - 5.0f, 6.0f, -12.f, - 7.0f, 8.0f, 16.0f, - 0.f, 0.f, -11.f, - 0.5f, -0.5f, -15.f, - 1.5f, 5.2f, -13.f, - 12.f, 9.f, 17.f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(mutable_data("arg_max", arg_max)); - topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 2, 2 }, { 1, 1, 1 }, {0, 0, 0})); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("pooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); - - EXPECT_EQ(output_layout.format, format::bfzyx); - EXPECT_EQ(output_layout.spatial(2), 1); - EXPECT_EQ(output_layout.spatial(1), 1); - EXPECT_EQ(output_layout.spatial(0), 2); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_argmax_vec = { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }; - - std::vector expected_output_vec = { - 4.0f, 4.0f, - 8.0f, 16.0f, - 0.5f, 0.0f, - 12.0f, 17.0f - }; - - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - EXPECT_EQ(expected_output_vec[i], output_ptr[i]); - EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]); - } -} - -TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) { - // Input : 2x2x3x2 - // Argmax : 2x2x2x1 - // Output : 2x2x2x2 - // Input Padding : 2x2 - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Output: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - - set_values(input, { - 1.0f, 2.0f, -10.f, - 3.0f, 4.0f, -14.f, - 5.0f, 6.0f, -12.f, - 7.0f, 8.0f, 16.0f, - 0.f, 0.f, -11.f, - 0.5f, -0.5f, -15.f, - 1.5f, 5.2f, -13.f, - 12.f, 9.f, 17.f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 }))); - topology.add(mutable_data("arg_max", arg_max)); - topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 })); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("pooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); - - EXPECT_EQ(output_layout.format, format::bfyx); - EXPECT_EQ(output_layout.spatial(1), 1); - EXPECT_EQ(output_layout.spatial(0), 2); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_argmax_vec = { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }; - - std::vector expected_output_vec = { - 4.0f, 4.0f, - 8.0f, 16.0f, - 0.5f, 0.0f, - 12.0f, 17.0f - }; - - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - EXPECT_EQ(expected_output_vec[i], output_ptr[i]); - EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]); - } -} - -TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) { - // Input : 2x2x3x2 - // Argmax : 2x2x2x1 - // Output : 2x2x2x2 - // Output Padding : 2x2 - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Output: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - - set_values(input, { - 1.0f, 2.0f, -10.f, - 3.0f, 4.0f, -14.f, - 5.0f, 6.0f, -12.f, - 7.0f, 8.0f, 16.0f, - 0.f, 0.f, -11.f, - 0.5f, -0.5f, -15.f, - 1.5f, 5.2f, -13.f, - 12.f, 9.f, 17.f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 }))); - topology.add(mutable_data("arg_max", arg_max)); - topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }, { 0, 0 }, padding({ 0, 0, 1, 1 }, 0))); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("pooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); - - EXPECT_EQ(output_layout.format, format::bfyx); - EXPECT_EQ(output_layout.spatial(1), 1); - EXPECT_EQ(output_layout.spatial(0), 2); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_argmax_vec = { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }; - - std::vector expected_output_vec = { - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 4.0f, 4.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 8.0f, 16.0f,0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.5f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 12.0f, 17.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, - }; - - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - EXPECT_EQ(expected_output_vec[i], output_ptr[i]); - } - - for (size_t i = 0; i < expected_argmax_vec.size(); ++i) { - EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]); - } -} - -TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) { - // Input : 2x2x3x2 - // Argmax : 2x2x2x1 - // Output : 2x2x2x2 - - // Forward Max Pooling Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 16 b1: 12 9 17 - // - // Output: - // f0: b0: 4 4 b1: 0.5 0 - // f1: b0: 8 16 b1: 12 17 - // - // Argmax: - // f0: b0: 4 4 b1: 15 13 - // f1: b0: 10 11 b1: 21 23 - - auto& engine = get_test_engine(); - - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); - auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - - set_values(input, { - 1.0f, 2.0f, -10.f, - 3.0f, 4.0f, -14.f, - 5.0f, 6.0f, -12.f, - 7.0f, 8.0f, 16.0f, - 0.f, 0.f, -11.f, - 0.5f, -0.5f, -15.f, - 1.5f, 5.2f, -13.f, - 12.f, 9.f, 17.f - }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(mutable_data("arg_max", arg_max)); - topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }, { 0, 0 }, { 2, 2, 2, 1 })); - - network network(engine, topology); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("pooling").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - auto output_layout = output->get_layout(); - cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); - - EXPECT_EQ(output_layout.format, format::bfyx); - EXPECT_EQ(output_layout.spatial(1), 1); - EXPECT_EQ(output_layout.spatial(0), 2); - EXPECT_EQ(output_layout.feature(), 2); - EXPECT_EQ(output_layout.batch(), 2); - - std::vector expected_argmax_vec = { - 4.0f, 4.0f, - 10.0f, 11.0f, - 15.0f, 13.0f, - 21.0f, 23.0f - }; - - std::vector expected_output_vec = { - 4.0f, 4.0f, - 8.0f, 16.0f, - 0.5f, 0.0f, - 12.0f, 17.0f - }; - - for (size_t i = 0; i < expected_output_vec.size(); ++i) { - EXPECT_EQ(expected_output_vec[i], output_ptr[i]); - EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]); - } -} - template static void generic_average_wo_padding_test(format fmt, tensor output, tensor input, ov::Shape window, ov::Strides stride, ov::Shape offset) { @@ -2259,7 +1717,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x topology topology; topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor))); - topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0})); + topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0})); topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, { 1,1,4,4 }, padding{ { 0, 0, 1, 1 }, 0 }))); network network(engine, topology); @@ -2332,7 +1790,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x topology topology; topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ { 0, 0, 2, 1 } , 0 }))); - topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0})); + topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0})); topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0, 0, 1, 1 }, 0 }))); network network(engine, topology); @@ -2409,7 +1867,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3 topology golden_topology; golden_topology.add(input_layout("input", input_prim->get_layout())); golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout().with_padding(padding{ {0,0,x_in_pad,y_in_pad},0 }))); - golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 })); + golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 })); network golden_network(engine, golden_topology); golden_network.set_input_data("input", input_prim); @@ -2426,7 +1884,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3 topology golden_topology; golden_topology.add(input_layout("input", input_prim->get_layout())); golden_topology.add(reorder("reorder_input", "input", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ {0, 0, x_in_pad, y_in_pad}, 0 }))); - golden_topology.add(pooling("fsv32_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 })); + golden_topology.add(pooling("fsv32_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 })); golden_topology.add(reorder("reorder_pooling", "fsv32_pooling", layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0,0,x_out_pad,y_out_pad },0 }))); network fsv32_network(engine, golden_topology); @@ -2468,15 +1926,8 @@ public: stride.insert(stride.begin(), stride_z()); pad.insert(pad.begin(), offset_z()); } - if (global_pooling()) - topo.add(pooling("pool", "input", pool_mode())); - else - topo.add(pooling("pool", - "input", - pool_mode(), - kernel, - stride, - pad)); + + topo.add(pooling("pool", "input", pool_mode(), kernel, stride, pad)); return topo; } @@ -2579,7 +2030,6 @@ public: size_t offset_x() { return _offset_x; } size_t offset_y() { return _offset_y; } size_t offset_z() { return _offset_z; } - bool global_pooling() { return _global_pooling; } void set_input(format::type input_fmt, VVVVVF input_data) { _input_fmt = input_fmt; @@ -2604,16 +2054,11 @@ public: _offset_z = z; } - void set_global_pooling(bool global_pooling) { - _global_pooling = global_pooling; - } - VVVVVF _input; format::type _input_fmt; size_t _pool_x, _pool_y, _pool_z; int _stride_x, _stride_y, _stride_z; int _offset_x, _offset_y, _offset_z; - bool _global_pooling; }; using pooling_random_test_params = std::tuple< @@ -2623,8 +2068,7 @@ using pooling_random_test_params = std::tuple< std::tuple, // pool x, y, z std::tuple, // stride x, y, z std::tuple, // offset x, y, z - format::type, // input format - bool // global pooling + format::type // input format >; template @@ -2647,8 +2091,7 @@ public: this->stride_z(), this->offset_x(), this->offset_y(), - this->offset_z(), - this->global_pooling()); + this->offset_z()); } } return reference; @@ -2658,7 +2101,6 @@ public: size_t b, f, in_x, in_y, in_z, p_x, p_y, p_z; int s_x, s_y, s_z, o_x, o_y, o_z; format::type in_fmt; - bool global_pooling; std::forward_as_tuple( b, @@ -2667,23 +2109,15 @@ public: std::forward_as_tuple(p_x, p_y, p_z), std::forward_as_tuple(s_x, s_y, s_z), std::forward_as_tuple(o_x, o_y, o_z), - in_fmt, - global_pooling + in_fmt ) = params; auto input_data = generate_random_5d(b, f, in_z, in_y, in_x, -256, 256); this->set_input(in_fmt, std::move(input_data)); - if (global_pooling) { - this->set_pool_size(0, 0, 0); - this->set_strides(1, 1, 1); - this->set_offsets(0, 0, 0); - } else { - this->set_pool_size(p_x, p_y, p_z); - this->set_strides(s_x, s_y, s_z); - this->set_offsets(o_x, o_y, o_z); - } - this->set_global_pooling(global_pooling); + this->set_pool_size(p_x, p_y, p_z); + this->set_strides(s_x, s_y, s_z); + this->set_offsets(o_x, o_y, o_z); } void run_random(const pooling_random_test_params& params) { @@ -2733,8 +2167,7 @@ INSTANTIATE_TEST_SUITE_P( format::bfyx, format::b_fs_yx_fsv4, format::b_fs_yx_fsv16, - format::b_fs_yx_fsv32), - testing::Values(false, true)), + format::b_fs_yx_fsv32)), testing::internal::DefaultParamName); INSTANTIATE_TEST_SUITE_P( @@ -2747,8 +2180,7 @@ INSTANTIATE_TEST_SUITE_P( testing::Values(std::tuple(2, 2, 2)), testing::Values(std::tuple(0, 0, 0)), testing::Values(format::bfzyx, - format::b_fs_zyx_fsv16), - testing::Values(false, true)), + format::b_fs_zyx_fsv16)), testing::internal::DefaultParamName); INSTANTIATE_TEST_SUITE_P( @@ -2761,9 +2193,7 @@ INSTANTIATE_TEST_SUITE_P( testing::Values(std::tuple(1, 1, 1), std::tuple(3, 3, 1)), testing::Values(std::tuple(1, 1, 1)), testing::Values(std::tuple(0, 0, 0)), - testing::Values(format::bs_fs_yx_bsv16_fsv16), - testing::Values(false, true) - ), + testing::Values(format::bs_fs_yx_bsv16_fsv16)), testing::internal::DefaultParamName); template @@ -2851,8 +2281,7 @@ INSTANTIATE_TEST_SUITE_P( format::b_fs_yx_fsv16, format::fs_b_yx_fsv32, format::b_fs_yx_fsv32, - format::b_fs_yx_fsv4), - testing::Values(false)), + format::b_fs_yx_fsv4)), testing::internal::DefaultParamName); TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) @@ -3485,10 +2914,10 @@ public: all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride)); // Output padding - all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_pad(2, 3, size), { { 0, 0, 1, 5 }, { 0, 0, 19, 4 } })); + all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_pad(2, 3, size), generate_pad(2, 3, size), { { 0, 0, 1, 5 }, { 0, 0, 19, 4 } })); // Input + output padding - all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_pad(2, 3, size), { { 0, 0, 2, 1 }, { 0, 0, 3, 4 } })); + all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_pad(2, 3, size), generate_pad(2, 3, size), { { 0, 0, 2, 1 }, { 0, 0, 3, 4 } })); } } } @@ -3545,8 +2974,8 @@ public: int height = generic_params->input_layouts[0].spatial(1); int width = generic_params->input_layouts[0].spatial(0); - auto pad_height = pooling->pad[0]; - auto pad_width = pooling->pad[1]; + auto pad_height = pooling->pads_begin[0]; + auto pad_width = pooling->pads_begin[1]; auto kernel_height = pooling->size[0]; auto kernel_width = pooling->size[1]; @@ -3581,8 +3010,8 @@ public: cldnn::pooling_mode pooling_mode = pooling->mode; - int pad_width = pooling->pad[1]; - int pad_height = pooling->pad[0]; + int pad_width = pooling->pads_begin[1]; + int pad_height = pooling->pads_begin[0]; int kernel_width = pooling->size[1]; int kernel_height = pooling->size[0]; diff --git a/src/plugins/intel_gpu/tests/test_utils/test_utils.h b/src/plugins/intel_gpu/tests/test_utils/test_utils.h index 89713cb2320..c88a1c970aa 100644 --- a/src/plugins/intel_gpu/tests/test_utils/test_utils.h +++ b/src/plugins/intel_gpu/tests/test_utils/test_utils.h @@ -552,7 +552,8 @@ inline void PrintTupleTo(const std::tuple, std::sha auto pooling = std::static_pointer_cast(primitive); std::string pooling_mode = (pooling->mode == cldnn::pooling_mode::max) ? "max" : "average"; str << "Pooling mode: " << pooling_mode - << " Pad x: " << pooling->pad[1] << " Pad y: " << pooling->pad[0] + << " Pads_begin x: " << pooling->pads_begin[1] << " Pads_begin y: " << pooling->pads_begin[0] + << " Pads_end x: " << pooling->pads_end[1] << " Pads_end y: " << pooling->pads_end[0] << " Stride x: " << pooling->stride[1] << " Stride y: " << pooling->stride[0] << " Size x: " << pooling->size[1] << " Size y: " << pooling->size[0]; } else {