[GPU] Pooling refactoring (#13664)

This commit is contained in:
Roman Lyamin 2022-11-03 14:53:32 +04:00 committed by GitHub
parent c11389fa18
commit 2b5dd4fac8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 110 additions and 1948 deletions

View File

@ -1,82 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs "max_unpooling" operation.
/// @details Reverse operation of max pooling, based on the argmax data where indices of each max pooling region are stored.
struct max_unpooling : public primitive_base<max_unpooling> {
CLDNN_DECLARE_PRIMITIVE(max_unpooling)
/// @brief Constructs max_unpooling primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param argmax Primitive id which contains indices of each max pooling region.
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// Used only for output size computation.
/// @param size Pooling kernel size. Used only for output size computation.
/// @param pad Defines logical pad value added to input tensor. Used only for output size computation.
max_unpooling(const primitive_id& id,
const primitive_id& input,
const primitive_id& argmax,
const tensor& size,
const tensor& stride,
const tensor& pad = {0, 0, 0, 0},
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
argmax(argmax),
pad(pad),
stride(stride),
size(size),
with_output_size(false) {}
/// @brief Constructs max_unpooling primitive (with provided output size)
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param argmax Primitive id which contains indices of each max pooling region.
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
/// @param output_size User-defined output data size of the primitive (w/o padding).
max_unpooling(const primitive_id& id,
const primitive_id& input,
const primitive_id& argmax,
tensor output_size,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
argmax(argmax),
with_output_size(true),
output_size(output_size) {}
/// @brief Primitive id which contains indices of each max pooling region.
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
primitive_id argmax;
/// @brief Defines logical pad value added to input tensor.
tensor pad;
/// @brief Defines shift in input buffer between adjacent calculations of output values. Used only for output size computation.
tensor stride;
/// @brief Pooling kernel size. Used only for output size computation.
tensor size;
/// @brief Indicates that the primitive has user-defined output size (non-zero value). Used only for output size computation.
bool with_output_size;
/// @brief User-defined output data size of the primitive (w/o padding).
tensor output_size;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {argmax}; }
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -26,8 +26,6 @@ enum class pooling_mode : int32_t {
average,
/// @brief Average-pooling method without values which are outside of the input.
average_no_padding,
/// @brief Maximum-pooling method with additional buffer to store argmax indices.
max_with_argmax,
/// @brief Pooling with bilinear interpolation.
bilinear,
/// @brief Deformable pooling with bilinear interpolation.
@ -51,46 +49,18 @@ struct pooling : public primitive_base<pooling> {
pooling_mode mode,
const ov::Shape& size,
const ov::Strides& stride,
const ov::Shape& pad = {0, 0},
const ov::Shape& pads_begin = {0, 0},
const ov::Shape& pads_end = {0, 0},
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
argmax(""),
mode(static_cast<pooling_mode>(mode)),
global_pooling(false),
pad(pad),
pads_begin(pads_begin),
pads_end(pads_end),
stride(stride),
size(size),
with_output_size(false),
pad_end(size.size(), 0) {}
with_output_size(false) {}
/// @brief Constructs pooling primitive with argmax.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param argmax Primitive id which contains indices of each max pooling region.
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
/// @param mode Pooling mode.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param size Pooling kernel size.
/// @param pad Defines logical pad value added to input tensor
pooling(const primitive_id& id,
const primitive_id& input,
const primitive_id& argmax,
pooling_mode mode,
const ov::Shape& size,
const ov::Strides& stride,
const ov::Shape& pad = {0, 0},
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
argmax(argmax),
mode(static_cast<pooling_mode>(mode)),
global_pooling(false),
pad(pad),
stride(stride),
size(size),
with_output_size(false),
pad_end(size.size(), 0) {}
/// @brief Constructs pooling primitive (computes input paddings to match output size).
/// @brief Constructs pooling primitive with known output shape.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param mode Pooling mode.
@ -103,68 +73,19 @@ struct pooling : public primitive_base<pooling> {
pooling_mode mode,
const ov::Shape& size,
const ov::Strides& stride,
const ov::Shape& pad,
const ov::Shape& pads_begin,
const ov::Shape& pads_end,
tensor output_size,
const data_types output_data_type,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding, optional_data_type{output_data_type}),
argmax(""),
mode(static_cast<pooling_mode>(mode)),
global_pooling(false),
pad(pad),
pads_begin(pads_begin),
pads_end(pads_end),
stride(stride),
size(size),
with_output_size(true),
output_size(output_size),
pad_end(size.size(), 0) {}
/// @brief Constructs pooling primitive with argmax (computes input paddings to match output size).
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param argmax Primitive id which contains indices of each max pooling region.
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
/// @param mode Pooling mode.
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
/// @param size Pooling kernel size.
/// @param pad Defines logical pad value added to input tensor.
/// @param output_size User-defined output data size of the primitive (w/o padding).
pooling(const primitive_id& id,
const primitive_id& input,
const primitive_id& argmax,
pooling_mode mode,
const ov::Shape& size,
const ov::Strides& stride,
const ov::Shape& pad,
tensor output_size,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
argmax(argmax),
mode(static_cast<pooling_mode>(mode)),
global_pooling(false),
pad(pad),
stride(stride),
size(size),
with_output_size(true),
output_size(output_size),
pad_end(size.size(), 0) {}
/// @brief Constructs pooling primitive with kernel size equal to the spatial dimension of input tensor.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param mode Pooling mode.
pooling(const primitive_id& id,
const primitive_id& input,
pooling_mode mode,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
argmax(""),
mode(static_cast<pooling_mode>(mode)),
global_pooling(true),
pad({0, 0}),
stride({1, 1}),
size({0, 0}),
with_output_size(false),
pad_end(size.size(), 0) {}
output_size(output_size) {}
/// @brief Constructs pooling primitive that supports MaxPool features from opset8 (dilation and indices output).
/// @param id This primitive id.
@ -184,41 +105,35 @@ struct pooling : public primitive_base<pooling> {
const ov::Shape& size,
const ov::Strides& stride,
const ov::Strides& dilation,
const ov::Shape& pad,
const ov::Shape& pad_end,
const ov::Shape& pads_begin,
const ov::Shape& pads_end,
int64_t axis,
data_types index_element_type,
tensor output_size,
const data_types output_data_type,
const padding& output_padding = padding())
: primitive_base(id, {input, indices_output}, output_padding, optional_data_type{output_data_type}),
argmax(""),
indices_output(indices_output),
mode(pooling_mode::max),
global_pooling(false),
pad(pad),
pads_begin(pads_begin),
pads_end(pads_end),
stride(stride),
dilation(dilation),
size(size),
with_output_size(true),
output_size(output_size),
pad_end(pad_end),
axis(axis),
index_element_type(index_element_type),
maxPoolOpset8Features(true)
{}
maxPoolOpset8Features(true) {}
/// @brief Primitive id which contains indices of each max pooling region.
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
primitive_id argmax;
/// @brief Primitive id which contains indices output.
primitive_id indices_output;
/// @brief Pooling mode.
pooling_mode mode;
/// @brief Global pooling (kernel size is equal to the spatial dimension of input tensor)
bool global_pooling;
/// @brief Defines logical pad value added to input tensor.
ov::Shape pad;
ov::Shape pads_begin;
/// @brief Defines a shift, relative to the end of padding shape.
ov::Shape pads_end;
/// @brief Defines shift in input buffer between adjacent calculations of output values.
ov::Strides stride;
/// @brief Defines index of next pixel to select when pooling
@ -229,8 +144,6 @@ struct pooling : public primitive_base<pooling> {
bool with_output_size;
/// @brief User-defined output data size of the primitive (w/o padding).
tensor output_size;
/// @brief Defines a shift, relative to the end of padding shape.
ov::Shape pad_end;
/// @brief first dimension of input that should be used to calculate the upper bound of index output
int64_t axis = 0;
/// @brief type of index output
@ -240,8 +153,6 @@ struct pooling : public primitive_base<pooling> {
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;
if (!argmax.empty())
ret.push_back(argmax);
if (!indices_output.empty())
ret.push_back(indices_output);
return ret;

View File

@ -111,7 +111,7 @@ void prepare_padding::run(program& p) {
needed_padding = calc_sliding_window_needed_input_padding(prim_node.input().get_output_layout(),
prim->output_size,
size,
ov::CoordinateDiff(prim->pad.begin(), prim->pad.end()),
ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()),
prim->stride,
ov::Strides(prim->size.size(), 1),
false,

View File

@ -649,11 +649,6 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
return data_type_traits::is_i8_u8(in_dt);
};
auto pooling_supports_fusings = [](pooling_node& node) -> bool {
auto pooling_mode = node.get_primitive()->mode;
return pooling_mode != cldnn::pooling_mode::max_with_argmax;
};
auto dts_supports_fusings = [](depth_to_space_node& node) -> bool {
bool input_conv = node.get_dependency(0).is_type<convolution>();
bool out_eltw = node.get_users().front()->is_type<eltwise>();
@ -772,7 +767,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
should_fuse |= input_data.is_type<gemm>() && gemm_supports_fusings(input_data.as<gemm>());
should_fuse |= input_data.is_type<pooling>() && pooling_supports_fusings(input_data.as<pooling>());
should_fuse |= input_data.is_type<pooling>();
should_fuse |= input_data.is_type<resample>();
@ -861,8 +856,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
_lo.get_optimization_attributes().use_onednn_impls ||
(in_dt_is_i8_u8 && out_dt_is_i8_u8));
should_fuse |= input_data.is_type<pooling>() && quantize_node.get_scale_shift_opt() &&
pooling_supports_fusings(input_data.as<pooling>());
should_fuse |= input_data.is_type<pooling>() && quantize_node.get_scale_shift_opt();
should_fuse |= input_data.is_type<fully_connected>() && quantize_node.get_scale_shift_opt();
@ -960,7 +954,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
(parents[i]->is_type<gather_elements>()) ||
(parents[i]->is_type<scatter_nd_update>()) ||
(parents[i]->is_type<scatter_elements_update>()) ||
(parents[i]->is_type<pooling>() && pooling_supports_fusings(parents[i]->as<pooling>())) ||
(parents[i]->is_type<pooling>()) ||
(parents[i]->is_type<depth_to_space>() && dts_supports_fusings(parents[i]->as<depth_to_space>())) ||
(parents[i]->is_type<gather>()) ||
(parents[i]->is_type<reduce>() && reduce_supports_fusings(parents[i]->as<reduce>())) ||

View File

@ -8,7 +8,6 @@
// ToDo: remove those include with the appropriate code below once we will have support for multiple outputs of a
// primitive
#include "max_unpooling_inst.h"
#include "pooling_inst.h"
#include <vector>
#include <queue>
@ -27,13 +26,10 @@ void trim_to_outputs::run(program& p) {
queue.push(&p.get_outputs());
std::vector<program_node*> special_nodes;
for (auto& node : p.get_processing_order()) {
if (node->is_type<input_layout>() || // input layout may become disconnected during prior boxes calculations so
// it may have not been marked at this place but we don't want to remove it
node->is_type<max_unpooling>() || // ToDo: remove this after support for multi-outputs in primitives will
// be implemented.
(node->is_type<pooling>() && node->as<pooling>().get_primitive()->mode == pooling_mode::max_with_argmax))
special_nodes.push_back(node);
for (auto& node : p.get_processing_order()) { // input layout may become disconnected during prior boxes calculations so
if (node->is_type<input_layout>()) { // it may have not been marked at this place but we don't want to remove it
special_nodes.push_back(node); // ToDo: remove this after support for multi-outputs in primitives will
} // be implemented.
}
queue.push(&special_nodes);

View File

@ -1,83 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "max_unpooling_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "intel_gpu/runtime/error_handler.hpp"
#include "intel_gpu/graph/network.hpp"
#include "kernel_selector_helper.h"
#include "max_unpooling/max_unpooling_kernel_selector.h"
#include "max_unpooling/max_unpooling_kernel_base.h"
#include <vector>
namespace cldnn {
namespace ocl {
struct max_unpooling_impl : typed_primitive_impl_ocl<max_unpooling> {
using parent = typed_primitive_impl_ocl<max_unpooling>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<max_unpooling_impl>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<max_unpooling>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
args.inputs.push_back(instance.dep_memory_ptr(1));
return args;
}
public:
event::ptr execute_impl(const std::vector<event::ptr>& events, max_unpooling_inst& instance) override {
// clear output buffer
std::vector<event::ptr> tmp_events(events);
auto& stream = instance.get_network().get_stream();
auto ev = instance.output_memory().fill(stream);
tmp_events.push_back(ev);
return parent::execute_impl(tmp_events, instance);
}
static primitive_impl* create(const max_unpooling_node& arg, const kernel_impl_params& impl_param) {
auto max_unpooling_params = get_default_params<kernel_selector::max_unpooling_params>(impl_param);
auto max_unpooling_optional_params =
get_default_optional_params<kernel_selector::max_unpooling_optional_params>(arg.get_program());
const auto max_idx = 1;
max_unpooling_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[max_idx]));
auto& kernel_selector = kernel_selector::max_unpooling_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(max_unpooling_params, max_unpooling_optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
auto max_unpool = new max_unpooling_impl(arg, best_kernels[0]);
return max_unpool;
}
};
namespace detail {
attach_max_unpooling_impl::attach_max_unpooling_impl() {
implementation_map<max_unpooling>::add(impl_types::ocl, max_unpooling_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -21,11 +21,9 @@ void validate_args(const pooling_node& arg) {
auto stride_rank = arg.get_primitive()->stride.size();
auto window_rank = arg.get_primitive()->size.size();
if (!arg.get_primitive()->global_pooling) {
CLDNN_ERROR_NOT_EQUAL(arg.id(), "input dimensions", input_rank, "output dimensions", output_rank, "");
CLDNN_ERROR_NOT_EQUAL(arg.id(), "stride dimensions", stride_rank, "output dimensions", output_rank, "");
CLDNN_ERROR_NOT_EQUAL(arg.id(), "window dimensions", window_rank, "output dimensions", output_rank, "");
}
}
kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
@ -36,8 +34,6 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
return kernel_selector::pool_type::AVG;
case pooling_mode::average_no_padding:
return kernel_selector::pool_type::AVG;
case pooling_mode::max_with_argmax:
return kernel_selector::pool_type::MAX_WITH_ARGMAX;
default:
assert(0);
return kernel_selector::pool_type::MAX;
@ -47,8 +43,6 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mode) {
switch (mode) {
case pooling_mode::max:
case pooling_mode::max_with_argmax:
return kernel_selector::kernel_divider_mode::DONT_CARE;
case pooling_mode::average:
return kernel_selector::kernel_divider_mode::FIXED;
case pooling_mode::average_no_padding:
@ -71,8 +65,6 @@ struct pooling_impl : typed_primitive_impl_ocl<pooling> {
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<pooling>& instance, int32_t split) const override {
kernel_arguments_data args = parent::get_arguments(instance, split);
if (!instance.argument->argmax.empty())
args.inputs.push_back(instance.dep_memory_ptr(1));
return args;
}
@ -102,7 +94,9 @@ public:
}
const auto& stride = primitive->stride;
const auto& pad = primitive->pad;
const auto& pads_begin = primitive->pads_begin;
const auto& pads_end = primitive->pads_end;
const auto& dilation = primitive->dilation;
auto kernel = primitive->size;
const auto& input_layout = impl_param.input_layouts[0];
@ -114,19 +108,12 @@ public:
pp.poolType = cldnn_2_pool_type(primitive->mode);
pp.remainderAction = kernel_selector::pool_remainder::CEIL;
if (primitive->global_pooling) {
kernel = ov::Shape(spatial_rank, 1);
for (size_t i = 0; i < spatial_rank; i++) {
kernel[i] = input_layout.spatial(spatial_rank - i - 1);
}
}
// check if last pooling window goes outside of input size + padding. If so the avg pooling size will be
// adjusted to that, to work properly this calculation must take pad_end into account.
auto dynamic_mode = false;
for (size_t i = 0; i < spatial_rank; i++) {
dynamic_mode |= (((output_layout.spatial(i) - 1) * stride[spatial_rank - i - 1]) + primitive->size[spatial_rank - i - 1]) >
(primitive->pad_end[spatial_rank - i - 1] + pad[spatial_rank - i - 1]) + input_layout.spatial(i);
(pads_end[spatial_rank - i - 1] + pads_begin[spatial_rank - i - 1]) + input_layout.spatial(i);
}
if (primitive->mode == pooling_mode::average && dynamic_mode)
@ -134,17 +121,14 @@ public:
else
pp.divMode = cldnn_2_kernel_divider_mode(primitive->mode);
if (primitive->mode == pooling_mode::max_with_argmax)
pool_params.inputs.push_back(convert_data_tensor(arg.argmax().get_output_layout()));
uint32_t kernel_z = kernel.size() >= 3 ? kernel[kernel.size() - 3] : 1;
uint32_t kernel_y = kernel.size() >= 2 ? kernel[kernel.size() - 2] : 1;
uint32_t kernel_x = kernel.size() >= 1 ? kernel[kernel.size() - 1] : 1;
pp.poolSize = {kernel_x, kernel_y, kernel_z};
uint32_t pad_z = std::max<std::ptrdiff_t>(pad.size() >= 3 ? pad[pad.size() - 3] : 0, 0);
uint32_t pad_y = std::max<std::ptrdiff_t>(pad.size() >= 2 ? pad[pad.size() - 2] : 0, 0);
uint32_t pad_x = std::max<std::ptrdiff_t>(pad.size() >= 1 ? pad[pad.size() - 1] : 0, 0);
uint32_t pad_z = std::max<std::ptrdiff_t>(pads_begin.size() >= 3 ? pads_begin[pads_begin.size() - 3] : 0, 0);
uint32_t pad_y = std::max<std::ptrdiff_t>(pads_begin.size() >= 2 ? pads_begin[pads_begin.size() - 2] : 0, 0);
uint32_t pad_x = std::max<std::ptrdiff_t>(pads_begin.size() >= 1 ? pads_begin[pads_begin.size() - 1] : 0, 0);
pp.poolPad = {pad_x, pad_y, pad_z};
uint32_t stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1;

View File

@ -46,7 +46,6 @@ void register_implementations() {
REGISTER_OCL(lrn);
REGISTER_OCL(lstm_gemm);
REGISTER_OCL(lstm_elt);
REGISTER_OCL(max_unpooling);
REGISTER_OCL(mutable_data);
REGISTER_OCL(mvn);
REGISTER_OCL(non_max_suppression);

View File

@ -43,7 +43,6 @@
#include "intel_gpu/primitives/lstm_dynamic.hpp"
#include "intel_gpu/primitives/lstm_dynamic_input.hpp"
#include "intel_gpu/primitives/lstm_dynamic_timeloop.hpp"
#include "intel_gpu/primitives/max_unpooling.hpp"
#include "intel_gpu/primitives/mutable_data.hpp"
#include "intel_gpu/primitives/mvn.hpp"
#include "intel_gpu/primitives/non_max_suppression.hpp"
@ -127,7 +126,6 @@ REGISTER_OCL(grid_sample);
REGISTER_OCL(lrn);
REGISTER_OCL(lstm_gemm);
REGISTER_OCL(lstm_elt);
REGISTER_OCL(max_unpooling);
REGISTER_OCL(mutable_data);
REGISTER_OCL(mvn);
REGISTER_OCL(non_max_suppression);

View File

@ -32,17 +32,12 @@ protected:
dnnl::memory::dims stride(prim->stride.begin(), prim->stride.end());
dnnl::memory::dims kernel(prim->size.begin(), prim->size.end());
dnnl::memory::dims pad_l(prim->pad.begin(), prim->pad.end());
dnnl::memory::dims pad_r(prim->pad_end.begin(), prim->pad_end.end());
dnnl::memory::dims pad_l(prim->pads_begin.begin(), prim->pads_begin.end());
dnnl::memory::dims pad_r(prim->pads_end.begin(), prim->pads_end.end());
auto input_md = onednn::layout_to_memory_desc(input_layout);
auto output_md = onednn::layout_to_memory_desc(output_layout);
if (prim->global_pooling) {
for (size_t i = 0; i < kernel.size(); i++)
kernel[i] = input_md.dims()[2 + i];
}
for (size_t i = 0; i < kernel.size(); i++) {
pad_r[i] = (output_md.dims()[2 + i] - 1) * stride[i] - input_md.dims()[2 + i] + kernel[i] - pad_l[i];
}

View File

@ -1,40 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "intel_gpu/primitives/max_unpooling.hpp"
#include "primitive_inst.h"
#include <string>
#include <memory>
namespace cldnn {
template <>
struct typed_program_node<max_unpooling> : public typed_program_node_base<max_unpooling> {
using parent = typed_program_node_base<max_unpooling>;
typed_program_node(const std::shared_ptr<max_unpooling> prim, program& prog);
public:
using parent::parent;
program_node& input() const { return get_dependency(0); }
program_node& argmax() const { return get_dependency(1); }
};
using max_unpooling_node = typed_program_node<max_unpooling>;
template <>
class typed_primitive_inst<max_unpooling> : public typed_primitive_inst_base<max_unpooling> {
using parent = typed_primitive_inst_base<max_unpooling>;
public:
typed_primitive_inst(network& network, max_unpooling_node const& desc);
static layout calc_output_layout(max_unpooling_node const& node, kernel_impl_params const& impl_param);
static std::string to_string(max_unpooling_node const& node);
};
using max_unpooling_inst = typed_primitive_inst<max_unpooling>;
} // namespace cldnn

View File

@ -1,112 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "max_unpooling_inst.h"
#include "primitive_type_base.h"
#include "sliding_window_utils_legacy.h"
#include "intel_gpu/runtime/error_handler.hpp"
#include "json_object.h"
#include <string>
#include <memory>
namespace cldnn {
primitive_type_id max_unpooling::type_id() {
static primitive_type_base<max_unpooling> instance;
return &instance;
}
max_unpooling_node::typed_program_node(const std::shared_ptr<max_unpooling> prim, program& prog)
: parent(prim, prog) {
can_share_buffer(false); // for max_unpooling initial zero values are significant
}
layout max_unpooling_inst::calc_output_layout(max_unpooling_node const& node, kernel_impl_params const& impl_param) {
assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&
"Output data type forcing is not supported for max_unpooling_node!");
auto desc = impl_param.typed_desc<max_unpooling>();
auto input_layout = impl_param.get_input_layout(0);
auto argmax_layout = impl_param.get_input_layout(1);
CLDNN_ERROR_NOT_EQUAL(desc->id,
"Argmax data type",
static_cast<size_t>(argmax_layout.data_type),
"expected to be fp32",
static_cast<size_t>(data_types::f32),
"Argmax data type is not fp32.");
if (desc->with_output_size) {
tensor output_size(input_layout.batch(),
input_layout.feature(),
desc->output_size.spatial[0],
desc->output_size.spatial[1]);
return {input_layout.data_type, input_layout.format, output_size};
}
auto pad = desc->pad;
auto stride = desc->stride;
auto window_size = desc->size;
CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
"stride spatial X",
stride.spatial[0],
"",
0,
"Stride spatial X must be positive (>= 1)");
CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
"stride spatial Y",
stride.spatial[1],
"",
0,
"Stride spatial Y must be positive (>= 1)");
CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
"window size spatial X",
window_size.spatial[0],
"",
0,
"Size X (of pooling window) must be positive (>= 1)");
CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
"window size spatial Y",
window_size.spatial[1],
"",
0,
"Size Y (of pooling window) must be positive (>= 1)");
auto output_range = calc_sliding_window_needed_input_range(input_layout.get_tensor(),
window_size,
pad,
stride,
{1, 1, 1, 1},
true,
1);
tensor output_size(input_layout.batch(),
input_layout.feature(),
output_range.spatial[0],
output_range.spatial[1]);
return {input_layout.data_type, input_layout.format, output_size};
}
std::string max_unpooling_inst::to_string(max_unpooling_node const& node) {
auto desc = node.get_primitive();
auto node_info = node.desc_to_json();
auto& input = node.input();
auto& argmax = node.argmax();
std::stringstream primitive_description;
json_composite max_unmax_unpooling_info;
max_unmax_unpooling_info.add("input", input.id());
max_unmax_unpooling_info.add("argmax", argmax.id());
node_info->add("max unmax_unpooling info", max_unmax_unpooling_info);
node_info->dump(primitive_description);
return primitive_description.str();
}
max_unpooling_inst::typed_primitive_inst(network& network, max_unpooling_node const& node)
: parent(network, node) {}
} // namespace cldnn

View File

@ -22,7 +22,7 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node, kernel_i
auto input_layout = impl_param.get_input_layout();
auto pad = desc->pad;
auto pad = desc->pads_begin;
auto stride = desc->stride;
auto window_size = desc->size;
@ -46,43 +46,6 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node, kernel_i
}
}
if (!desc->argmax.empty())
CLDNN_ERROR_NOT_EQUAL(desc->id,
"Pooling mode",
static_cast<size_t>(desc->mode),
"should be max_with_argmax",
static_cast<size_t>(pooling_mode::max_with_argmax),
"Pooling mode should be set to max_with_argmax when argmax primitive is present.");
if (desc->mode == pooling_mode::max_with_argmax) {
CLDNN_ERROR_NOT_EQUAL(desc->id,
"Argmax primitive",
static_cast<size_t>(desc->argmax.empty()),
"should not be empty",
static_cast<size_t>(0),
"Argmax primitive not present despite max_with_argmax mode.");
auto argmax_layout = impl_param.get_input_layout(1);
CLDNN_ERROR_NOT_EQUAL(desc->id,
"Argmax data type",
static_cast<size_t>(argmax_layout.data_type),
"expected to be fp32",
static_cast<size_t>(data_types::f32),
"Argmax data type is not fp32.");
CLDNN_ERROR_NOT_PROPER_FORMAT(desc->id,
"Input_layout.format",
input_layout.format.value,
"argmax_layout.format",
argmax_layout.format);
}
if (desc->global_pooling) {
window_size = ov::Shape(input_layout.get_spatial_rank(), 1);
for (size_t i = 0; i < input_layout.get_spatial_rank(); i++) {
window_size[i] = input_layout.spatial(input_layout.get_spatial_rank() - i - 1);
}
}
uint32_t stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1;
uint32_t stride_y = stride.size() >= 2 ? stride[stride.size() - 2] : 1;
uint32_t stride_x = stride.size() >= 1 ? stride[stride.size() - 1] : 1;
@ -190,13 +153,10 @@ std::string pooling_inst::to_string(pooling_node const& node) {
std::stringstream primitive_description;
bool is_global = desc->global_pooling;
json_composite pooling_info;
pooling_info.add("mode", mode);
pooling_info.add("stride", cldnn::to_string(strd));
pooling_info.add("kernel size", cldnn::to_string(kernel_size));
pooling_info.add("is global", is_global ? "true" : "false");
if (desc->with_output_size) {
json_composite ud_out_size_info;
ud_out_size_info.add("size", desc->output_size.to_string());

View File

@ -327,7 +327,7 @@ bool program::analyze_output_size_handling_need() {
auto calc_output_range = calc_sliding_window_output_range<swor_mode::exceed_once_data>(
primInputSize,
size,
ov::CoordinateDiff(prim->pad.begin(), prim->pad.end()),
ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()),
prim->stride,
ov::Strides(prim->stride.size(), 1),
true,

View File

@ -1,44 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "include/batch_headers/data_types.cl"
#include "include/batch_headers/fetch_data.cl"
KERNEL(pooling_gpu)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output, const __global float* arg_max)
{
#if OUTPUT_LAYOUT_BFYX || OUTPUT_LAYOUT_BYXF
const uint x = (uint)get_global_id(0);
const uint y = (uint)get_global_id(1);
const uint bf = (uint)get_global_id(2);
const uint f = bf % INPUT0_FEATURE_NUM;
const uint b = bf / INPUT0_FEATURE_NUM;
if (x >= INPUT0_SIZE_X)
{
return;
}
#elif OUTPUT_LAYOUT_YXFB
const uint x = (uint)get_global_id(1);
const uint y = (uint)get_global_id(2);
const uint bf = (uint)get_global_id(0);
const uint f = bf / INPUT0_BATCH_NUM;
const uint b = bf % INPUT0_BATCH_NUM;
#endif
const uint input_id = GET_DATA_INDEX(INPUT0, b, f, y, x);
const uint arg_max_id = GET_DATA_INDEX(INPUT1, b, f, y, x);
const uint pool_idx = convert_uint(arg_max[arg_max_id]);
#if OUTPUT_PADDED
const uint x_output = pool_idx % OUTPUT_SIZE_X;
const uint y_output = (pool_idx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
const uint f_output = (pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM;
const uint b_output = pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y / OUTPUT_FEATURE_NUM;
const uint output_pos = GET_DATA_INDEX(OUTPUT, b_output, f_output, y_output, x_output);
output[output_pos] += ACTIVATION(input[input_id], ACTIVATION_PARAMS);
#else
output[pool_idx] += ACTIVATION(input[input_id], ACTIVATION_PARAMS);
#endif
}

View File

@ -5,7 +5,7 @@
#include "include/batch_headers/data_types.cl"
#include "include/batch_headers/fetch_data.cl"
#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
#if MAX_POOLING
#define INIT_VAL ACCUMULATOR_VAL_MIN
#elif defined AVG_POOLING
#define INIT_VAL ACCUMULATOR_VAL_ZERO
@ -15,7 +15,7 @@
inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TYPE in)
{
#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
#if MAX_POOLING
return ACCUMULATOR_MAX_FUNC(tmp, in);
#elif AVG_POOLING
return tmp + in;
@ -25,9 +25,6 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TY
KERNEL(pooling_gpu)(
const __global INPUT0_TYPE* input,
__global OUTPUT_TYPE* output
#if MAX_WITH_ARGMAX_POOLING
, __global float* arg_max
#endif
#if HAS_FUSED_OPS_DECLS
, FUSED_OPS_DECLS
#endif
@ -49,12 +46,6 @@ KERNEL(pooling_gpu)(
ACCUMULATOR_TYPE max_x[BLOCK_SIZE_Y];
ACCUMULATOR_TYPE result[POOL_SIZE_Y];
#if MAX_WITH_ARGMAX_POOLING
uint arg_max_x[BLOCK_SIZE_Y] = { 0 };
uint arg_max_result[POOL_SIZE_Y] = { 0 };
uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
#endif
for(uint i = 0; i < BLOCK_SIZE_Y; i++)
{
max_x[i] = INIT_VAL;
@ -65,33 +56,15 @@ KERNEL(pooling_gpu)(
{
for(uint i = 0; i < POOL_SIZE_X; i++)
{
#if MAX_WITH_ARGMAX_POOLING
if(input[input_idx] > max_x[j])
arg_max_x[j] = input_idx_bfyx_no_padding;
#endif
max_x[j] = FUNC_CALL(apply_pooling)(max_x[j], TO_ACCUMULATOR_TYPE(input[input_idx]));
input_idx += INPUT0_X_PITCH;
#if MAX_WITH_ARGMAX_POOLING
input_idx_bfyx_no_padding++;
#endif
}
input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH);
#if MAX_WITH_ARGMAX_POOLING
input_idx_bfyx_no_padding += (INPUT0_SIZE_X - POOL_SIZE_X);
#endif
}
for(uint i = 0; i < POOL_SIZE_Y; i++)
{
result[i] = max_x[i * STRIDE_SIZE_Y];
#if MAX_WITH_ARGMAX_POOLING
arg_max_result[i] = arg_max_x[i * STRIDE_SIZE_Y];
#endif
}
// now we do max in "y" dimension
@ -100,10 +73,6 @@ KERNEL(pooling_gpu)(
for(uint j = 1; j < POOL_SIZE_Y; j++)
{
#if MAX_WITH_ARGMAX_POOLING
if(max_x[j + i * STRIDE_SIZE_Y] > result[i])
arg_max_result[i] = arg_max_x[j + i * STRIDE_SIZE_Y];
#endif
result[i] = FUNC_CALL(apply_pooling)(result[i], max_x[j + i * STRIDE_SIZE_Y]);
}
@ -111,10 +80,6 @@ KERNEL(pooling_gpu)(
uint output_pos = GET_DATA_INDEX(OUTPUT, b, f, y, x);
#if MAX_WITH_ARGMAX_POOLING
uint arg_max_pos = GET_DATA_INDEX(INPUT1, b, f, y, x);
#endif
OUTPUT_TYPE final_result;
ACTIVATION_TYPE pool_result;
@ -134,10 +99,6 @@ KERNEL(pooling_gpu)(
#endif
output[output_pos] = final_result;
output_pos += OUTPUT_Y_PITCH;
#if MAX_WITH_ARGMAX_POOLING
arg_max[arg_max_pos] = arg_max_result[i];
arg_max_pos += INPUT1_Y_PITCH;
#endif
}
}
}

View File

@ -5,7 +5,7 @@
#include "include/batch_headers/data_types.cl"
#include "include/batch_headers/fetch_data.cl"
#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
#if MAX_POOLING
#define INIT_VAL ACCUMULATOR_VAL_MIN
#elif AVG_POOLING
#define INIT_VAL ACCUMULATOR_VAL_ZERO
@ -15,7 +15,7 @@
inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TYPE in)
{
#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
#if MAX_POOLING
return ACCUMULATOR_MAX_FUNC(tmp, in);
#elif AVG_POOLING
return tmp + in;
@ -25,9 +25,6 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TY
KERNEL(pooling_gpu)(
const __global INPUT0_TYPE* input,
__global OUTPUT_TYPE* output
#if MAX_WITH_ARGMAX_POOLING
, __global float* arg_max
#endif
#ifdef SELECTED_INDICES_TYPE
, __global SELECTED_INDICES_TYPE* indices
#endif
@ -98,10 +95,6 @@ KERNEL(pooling_gpu)(
uint result_idx = 0;
#endif
#if MAX_WITH_ARGMAX_POOLING
uint arg_max_idx = 0;
#endif
#ifdef CHECK_BOUNDARY
if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y ||
@ -163,19 +156,6 @@ KERNEL(pooling_gpu)(
const uint input_idx = batch_and_feature_offset + input_offset_y*INPUT0_Y_PITCH + input_offset_x*INPUT0_X_PITCH;
#endif
#endif
#if MAX_WITH_ARGMAX_POOLING
if(input[input_idx] > result)
{
#if OUTPUT_DIMS < 5
const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
#else
const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y *
(input_offset_z + INPUT0_SIZE_Z * (f + INPUT0_FEATURE_NUM * b)));
#endif
arg_max_idx = input_idx_bfyx_no_padding;
}
#endif
const ACCUMULATOR_TYPE casted_input = TO_ACCUMULATOR_TYPE(input[input_idx]);
#ifdef SELECTED_INDICES_TYPE
if (casted_input > result)
@ -220,14 +200,6 @@ KERNEL(pooling_gpu)(
uint input_idx = INPUT0_GET_INDEX(b, f, offset_y, offset_x);
#endif
#if MAX_WITH_ARGMAX_POOLING
#if OUTPUT_DIMS < 5
uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
#else
uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (offset_z + INPUT0_SIZE_Z *(f + INPUT0_FEATURE_NUM * b)));
#endif
#endif
#if OUTPUT_DIMS == 5
for(uint l = 0; l < POOL_SIZE_Z; l++)
{
@ -236,11 +208,6 @@ KERNEL(pooling_gpu)(
{
for(uint i = 0; i < POOL_SIZE_X; i++)
{
#if MAX_WITH_ARGMAX_POOLING
if(input[input_idx] > result)
arg_max_idx = input_idx_bfyx_no_padding;
#endif
#if OUTPUT_DIMS == 5
#if !INPUT0_SIMPLE
uint input_idx = INPUT0_GET_INDEX(b, f, offset_z + l, offset_y + j, offset_x + i);
@ -267,21 +234,11 @@ KERNEL(pooling_gpu)(
input_idx += INPUT0_X_PITCH;
#endif
#endif
#if MAX_WITH_ARGMAX_POOLING
input_idx_bfyx_no_padding++;
#endif
}
input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH);
#if MAX_WITH_ARGMAX_POOLING
input_idx_bfyx_no_padding += (INPUT0_SIZE_X - POOL_SIZE_X);
#endif
}
#if OUTPUT_DIMS == 5 // 3D
input_idx += (INPUT0_Z_PITCH - POOL_SIZE_Y*INPUT0_Y_PITCH);
#if MAX_WITH_ARGMAX_POOLING
input_idx_bfyx_no_padding += (INPUT0_SIZE_Y - POOL_SIZE_Y);
#endif
}
#endif
@ -322,12 +279,6 @@ KERNEL(pooling_gpu)(
#endif
indices[output_pos] = TO_SELECTED_INDICES_TYPE(result_idx);
#endif
#if MAX_WITH_ARGMAX_POOLING
//INPUT1 macro stands for Argmax
const uint arg_max_pos = GET_DATA_INDEX_5D(INPUT1, b, f, z, y, x);
arg_max[arg_max_pos] = convert_float(arg_max_idx);
#endif
}
#undef INIT_VAL

View File

@ -38,7 +38,6 @@ enum class KernelType {
RESAMPLE,
REGION_YOLO,
REORG_YOLO,
MAX_UNPOOLING,
MVN,
LSTM_GEMM,
LSTM_ELT,
@ -184,7 +183,6 @@ enum class ActivationFunction {
enum class PoolType {
MAX,
AVG,
MAX_WITH_ARGMAX,
BILINEAR,
DEFORMABLE_BILINEAR
};

View File

@ -242,7 +242,6 @@ std::string toString(PoolType mode) {
switch (mode) {
case PoolType::MAX: return "MAX";
case PoolType::AVG: return "AVG";
case PoolType::MAX_WITH_ARGMAX: return "MAX_WITH_ARGMAX";
case PoolType::BILINEAR: return "BILINEAR";
case PoolType::DEFORMABLE_BILINEAR: return "DEFORMABLE_BILINEAR";
default: return "";

View File

@ -213,9 +213,6 @@ void ParamsKey::EnablePoolType(PoolType t) {
case PoolType::AVG:
key.restrict.val.dedicated.pooling.avg = 1;
break;
case PoolType::MAX_WITH_ARGMAX:
key.restrict.val.dedicated.pooling.max_with_argmax = 1;
break;
case PoolType::BILINEAR:
key.restrict.val.dedicated.pooling.bilinear = 1;
break;

View File

@ -106,7 +106,6 @@ public:
uint32_t max : 1;
uint32_t avg : 1;
uint32_t floor : 1;
uint32_t max_with_argmax : 1;
uint32_t ceil : 1;
uint32_t bilinear : 1;
uint32_t deformable_bilinear : 1;

View File

@ -1,77 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "max_unpooling_kernel_base.h"
#include <algorithm>
namespace kernel_selector {
bool MaxUnpoolingKernelBase::Validate(const Params& p, const optional_params& o) const {
if (p.GetType() != KernelType::MAX_UNPOOLING || o.GetType() != KernelType::MAX_UNPOOLING) {
return false;
}
return true;
}
JitConstants MaxUnpoolingKernelBase::GetJitConstants(const max_unpooling_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
if (params.outputs[0].PitchesDifferFromLogicalDims())
jit.AddConstant(MakeJitConstant("OUTPUT_PADDED", 1));
return jit;
}
MaxUnpoolingKernelBase::DispatchData MaxUnpoolingKernelBase::SetDefault(const max_unpooling_params& params) const {
const auto& input = params.inputs[0];
DispatchData dispatchData;
if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::byxf) {
// Determine global work sizes.
dispatchData.gws[2] = input.Batch().v * input.Feature().v; // B, F
dispatchData.gws[0] = Align(input.X().v, 32); // X
dispatchData.gws[1] = input.Y().v; // Y
dispatchData.lws[0] = 32;
dispatchData.lws[1] = 1;
dispatchData.lws[2] = 1;
} else {
// Determine global work sizes.
dispatchData.gws[0] = input.Batch().v * input.Feature().v; // B, F
dispatchData.gws[1] = input.X().v; // X
dispatchData.gws[2] = input.Y().v; // Y
dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
--dispatchData.lws[0];
}
dispatchData.lws[1] = 1;
dispatchData.lws[2] = 1;
}
return dispatchData;
}
KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
const max_unpooling_params& orgParams = static_cast<const max_unpooling_params&>(params);
DispatchData dispatchData = SetDefault(orgParams);
KernelData kd = KernelData::Default<max_unpooling_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
return {kd};
}
} // namespace kernel_selector

View File

@ -1,43 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_base_opencl.h"
#include "kernel_selector_params.h"
namespace kernel_selector {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// max_unpooling_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct max_unpooling_params : public base_params {
max_unpooling_params() : base_params(KernelType::MAX_UNPOOLING) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// max_unpooling_optional_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct max_unpooling_optional_params : optional_params {
max_unpooling_optional_params() : optional_params(KernelType::MAX_UNPOOLING) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// MaxUnpoolingKernelBase
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class MaxUnpoolingKernelBase : public KernelBaseOpenCL {
public:
using KernelBaseOpenCL::KernelBaseOpenCL;
virtual ~MaxUnpoolingKernelBase() {}
struct DispatchData : public CommonDispatchData {
bool needsBoundary = false;
};
protected:
bool Validate(const Params&, const optional_params&) const override;
virtual JitConstants GetJitConstants(const max_unpooling_params& params) const;
virtual DispatchData SetDefault(const max_unpooling_params& params) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
};
} // namespace kernel_selector

View File

@ -1,36 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "max_unpooling_kernel_gpu_ref.h"
namespace kernel_selector {
ParamsKey MaxUnpoolingKernelGPURef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::yxfb);
k.EnableInputLayout(DataLayout::byxf);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
k.EnableDifferentTypes();
return k;
}
KernelsData MaxUnpoolingKernelGPURef::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options);
}
KernelsPriority MaxUnpoolingKernelGPURef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
return FORCE_PRIORITY_9;
}
} // namespace kernel_selector

View File

@ -1,19 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "max_unpooling_kernel_base.h"
namespace kernel_selector {
class MaxUnpoolingKernelGPURef : public MaxUnpoolingKernelBase {
public:
MaxUnpoolingKernelGPURef() : MaxUnpoolingKernelBase("max_unpooling_gpu_ref") {}
virtual ~MaxUnpoolingKernelGPURef() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
};
} // namespace kernel_selector

View File

@ -1,15 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "max_unpooling_kernel_selector.h"
#include "max_unpooling_kernel_gpu_ref.h"
namespace kernel_selector {
max_unpooling_kernel_selector::max_unpooling_kernel_selector() { Attach<MaxUnpoolingKernelGPURef>(); }
KernelsData max_unpooling_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::MAX_UNPOOLING);
}
} // namespace kernel_selector

View File

@ -1,23 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class max_unpooling_kernel_selector : public kernel_selector_base {
public:
static max_unpooling_kernel_selector& Instance() {
static max_unpooling_kernel_selector instance_;
return instance_;
}
max_unpooling_kernel_selector();
virtual ~max_unpooling_kernel_selector() {}
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -203,8 +203,6 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params,
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1,
GetFusedPrimitiveInputsCount(params));
uint32_t param_idx = 1;
if (orgParams.poolType == PoolType::MAX_WITH_ARGMAX)
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++});
if (orgParams.maxPoolOpset8Features) {
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++});

View File

@ -20,7 +20,6 @@ ParamsKey PoolingKernel_b_fs_yx_fsv16::GetSupportedKey() const {
k.EnableBatching();
k.EnablePoolType(PoolType::MAX);
k.EnablePoolType(PoolType::AVG);
k.EnablePoolType(PoolType::MAX_WITH_ARGMAX);
k.EnablePoolRemainder(PoolRemainder::FLOOR);
k.EnablePoolRemainder(PoolRemainder::CEIL);
k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);

View File

@ -20,7 +20,6 @@ ParamsKey PoolingKernelGPUBfyxBlockOpt::GetSupportedKey() const {
k.EnableBatching();
k.EnablePoolType(PoolType::MAX);
k.EnablePoolType(PoolType::AVG);
k.EnablePoolType(PoolType::MAX_WITH_ARGMAX);
k.EnablePoolRemainder(PoolRemainder::FLOOR);
k.EnablePoolRemainder(PoolRemainder::CEIL);
k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);

View File

@ -90,8 +90,8 @@ bool PoolingKernelGPUInt8Ref::Validate(const Params& params, const optional_para
if (p.inputs[0].GetDType() == Datatype::INT8 || p.inputs[0].GetDType() == Datatype::UINT8) {
// Max pooling doesn't change quantization ranges, so output data type should be the same as input
if ((p.poolType == PoolType::MAX || p.poolType == PoolType::MAX_WITH_ARGMAX)
&& (p.outputs[0].GetDType() != p.inputs[0].GetDType()) && p.quantization == QuantizationType::NONE)
if (p.poolType == PoolType::MAX && p.outputs[0].GetDType() != p.inputs[0].GetDType() &&
p.quantization == QuantizationType::NONE)
return false;
// Average pooling should produce FP by default. (u)int8 is possible when quantize op is fused.
// if (p.poolType == PoolType::AVG &&

View File

@ -20,7 +20,6 @@ ParamsKey PoolingKernelGPURef::GetSupportedKey() const {
k.EnableBatching();
k.EnablePoolType(PoolType::MAX);
k.EnablePoolType(PoolType::AVG);
k.EnablePoolType(PoolType::MAX_WITH_ARGMAX);
k.EnablePoolRemainder(PoolRemainder::FLOOR);
k.EnablePoolRemainder(PoolRemainder::CEIL);
k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);

View File

@ -36,9 +36,9 @@ static void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Av
kernel,
strides,
pads_begin,
pads_end,
tensor_from_dims(op->get_output_shape(0)),
cldnn::element_type_to_data_type(op->get_output_element_type(0)));
poolPrim.pad_end = pads_end;
p.add_primitive(*op, poolPrim);
}
@ -64,9 +64,9 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Ma
kernel,
strides,
pads_begin,
pads_end,
tensor_from_dims(op->get_output_shape(0)),
cldnn::element_type_to_data_type(op->get_output_element_type(0)));
poolPrim.pad_end = pads_end;
p.add_primitive(*op, poolPrim);
}

View File

@ -148,11 +148,12 @@ TEST_P(pooling_f32_activation, basic) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 3);
ov::Strides stride(r, 1);
ov::Shape pad(r, 1);
ov::Shape pads_begin(r, 1);
ov::Shape pads_end(r, 1);
create_topologies(
input_layout("input", get_input_layout(p)),
pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
activation("act", "pooling", activation_func::relu),
reorder("output_reorder", "act", format::bfyx, data_types::f32)
);
@ -185,12 +186,13 @@ TEST_P(pooling_f32_scale, basic) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 3);
ov::Strides stride(r, 1);
ov::Shape pad(r, 1);
ov::Shape pads_begin(r, 1);
ov::Shape pads_end(r, 1);
create_topologies(
input_layout("input", get_input_layout(p)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 9.0f)),
pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
reorder("output_reorder", "scale", format::bfyx, data_types::f32)
);
@ -205,12 +207,13 @@ TEST_P(pooling_f32_scale, fp16_scale_out) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 3);
ov::Strides stride(r, 1);
ov::Shape pad(r, 1);
ov::Shape pads_begin(r, 1);
ov::Shape pads_end(r, 1);
create_topologies(
input_layout("input", get_input_layout(p)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 9.0f)),
pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, data_types::f16),
reorder("output_reorder", "scale", format::bfyx, data_types::f32)
);
@ -241,7 +244,8 @@ TEST_P(pooling_scale_activation_quantize, basic) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 4);
ov::Strides stride(r, 2);
ov::Shape pad(r, 0);
ov::Shape pads_begin(r, 0);
ov::Shape pads_end(r, 0);
create_topologies(
input_layout("input", get_input_layout(p)),
@ -250,7 +254,7 @@ TEST_P(pooling_scale_activation_quantize, basic) {
data("out_lo", get_mem(get_single_element_layout(p), 0)),
data("out_hi", get_mem(get_single_element_layout(p), 255)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
activation("activation", "scale", activation_func::relu),
quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::u8),
@ -267,7 +271,8 @@ TEST_P(pooling_scale_activation_quantize, i8_output_data_type) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 4);
ov::Strides stride(r, 2);
ov::Shape pad(r, 0);
ov::Shape pads_begin(r, 0);
ov::Shape pads_end(r, 0);
create_topologies(
input_layout("input", get_input_layout(p)),
@ -276,7 +281,7 @@ TEST_P(pooling_scale_activation_quantize, i8_output_data_type) {
data("out_lo", get_mem(get_single_element_layout(p), -127, 127)),
data("out_hi", get_mem(get_single_element_layout(p), -127, 127)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
activation("activation", "scale", activation_func::relu),
quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
@ -293,7 +298,8 @@ TEST_P(pooling_scale_activation_quantize, per_channel) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 4);
ov::Strides stride(r, 2);
ov::Shape pad(r, 0);
ov::Shape pads_begin(r, 0);
ov::Shape pads_end(r, 0);
create_topologies(
input_layout("input", get_input_layout(p)),
@ -302,7 +308,7 @@ TEST_P(pooling_scale_activation_quantize, per_channel) {
data("out_lo", get_mem(get_single_element_layout(p), 0)),
data("out_hi", get_mem(get_single_element_layout(p), 255)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
activation("activation", "scale", activation_func::hyperbolic_tan),
quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::u8),
@ -362,12 +368,13 @@ TEST_P(pooling_scale_activation, basic) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 4);
ov::Strides stride(r, 2);
ov::Shape pad(r, 0);
ov::Shape pads_begin(r, 0);
ov::Shape pads_end(r, 0);
create_topologies(
input_layout("input", get_input_layout(p)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
activation("activation", "scale", activation_func::relu),
reorder("output_reorder", "activation", p.default_format, data_types::f32)
@ -383,12 +390,13 @@ TEST_P(pooling_scale_activation, eltwise_mul) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 4);
ov::Strides stride(r, 2);
ov::Shape pad(r, 0);
ov::Shape pads_begin(r, 0);
ov::Shape pads_end(r, 0);
create_topologies(
input_layout("input", get_input_layout(p)),
data("scale_data", get_mem(get_per_channel_layout(p))),
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
activation("activation", "scale", activation_func::relu),
reorder("output_reorder", "activation", p.default_format, data_types::f32)
@ -577,11 +585,12 @@ TEST_P(pooling_onednn_activation1, basic) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 3);
ov::Strides stride(r, 1);
ov::Shape pad(r, 1);
ov::Shape pads_begin(r, 1);
ov::Shape pads_end(r, 1);
create_topologies(
input_layout("input", get_input_layout(p)),
pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
activation("act", "pooling", activation_func::relu),
reorder("output_reorder", "act", format::bfyx, data_types::f32)
);
@ -597,10 +606,12 @@ TEST_P(pooling_onednn_activation2, basic) {
auto r = get_input_layout(p).get_spatial_rank();
ov::Shape kernel(r, 3);
ov::Strides stride(r, 1);
ov::Shape pads_begin(r, 0);
ov::Shape pads_end(r, 0);
create_topologies(
input_layout("input", get_input_layout(p)),
pooling("pooling", "input", p.pool_mode, kernel, stride),
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
activation("act", "pooling", activation_func::relu),
reorder("output_reorder", "act", format::bfyx, data_types::f32)
);

View File

@ -1,442 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/max_unpooling.hpp>
#include <intel_gpu/primitives/reorder.hpp>
#include <intel_gpu/primitives/data.hpp>
#include <intel_gpu/primitives/mutable_data.hpp>
#include <intel_gpu/primitives/pooling.hpp>
using namespace cldnn;
using namespace ::tests;
TEST(max_unpooling_gpu, basic_in2x3x2x2) {
// Input : 2x2x2x1
// Argmax : 2x2x2x1
// Output : 2x2x3x2
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Input:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
//
// Output:
// f0: b0: 0 0 0 b1: 0 0 0
// f0: b0: 0 4 0 b1: 0.5 0 0
// f1: b0: 0 0 0 b1: 0 0 0
// f1: b0: 0 8 16 b1: 12 0 17
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
set_values(input, {
4.0f, 4.0f,
8.0f, 16.0f,
0.5f, 0.0f,
12.0f, 17.0f
});
set_values(arg_max, {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("arg_max", arg_max));
topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("max_unpooling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(output_layout.spatial(1), 2);
EXPECT_EQ(output_layout.spatial(0), 3);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_output_vec = {
0.f, 0.f, 0.f,
0.f, 4.f, 0.f,
0.f, 0.f, 0.f,
0.f, 8.f, 16.f,
0.f, 0.f, 0.f,
0.5f, 0.f, 0.f,
0.f, 0.f, 0.f,
12.f, 0.f, 17.f,
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
}
}
TEST(max_unpooling_gpu, basic_in2x3x2x2_output_padding) {
// Input : 2x2x2x1
// Argmax : 2x2x2x1
// Output : 2x2x3x2
// Output Padding : 0x0x1x1
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Input:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
//
// Output:
// f0: b0: 0 0 0 b1: 0 0 0
// f0: b0: 0 4 0 b1: 0.5 0 0
// f1: b0: 0 0 0 b1: 0 0 0
// f1: b0: 0 8 16 b1: 12 0 17
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
set_values(input, {
4.0f, 4.0f,
8.0f, 16.0f,
0.5f, 0.0f,
12.0f, 17.0f
});
set_values(arg_max, {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("arg_max", arg_max));
topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, padding({ 0, 0, 1, 1 }, 0)));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("max_unpooling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(output_layout.spatial(1), 2);
EXPECT_EQ(output_layout.spatial(0), 3);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_output_vec = {
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 4.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 8.f, 16.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.5f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 12.f, 0.f, 17.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f,
};
std::vector<float> out;
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
out.push_back(output_ptr[i]);
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
}
}
TEST(max_unpooling_gpu, basic_in2x3x2x2_output_size) {
// Input : 2x2x2x1
// Argmax : 2x2x2x1
// Output : 2x2x3x2
// Output size explicitly provided
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Input:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
//
// Output:
// f0: b0: 0 0 0 b1: 0 0 0
// f0: b0: 0 4 0 b1: 0.5 0 0
// f1: b0: 0 0 0 b1: 0 0 0
// f1: b0: 0 8 16 b1: 12 0 17
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
set_values(input, {
4.0f, 4.0f,
8.0f, 16.0f,
0.5f, 0.0f,
12.0f, 17.0f
});
set_values(arg_max, {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("arg_max", arg_max));
topology.add(max_unpooling("max_unpooling", "input", "arg_max", {2, 2, 3, 2}));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("max_unpooling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(output_layout.spatial(1), 2);
EXPECT_EQ(output_layout.spatial(0), 3);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_output_vec = {
0.f, 0.f, 0.f,
0.f, 4.f, 0.f,
0.f, 0.f, 0.f,
0.f, 8.f, 16.f,
0.f, 0.f, 0.f,
0.5f, 0.f, 0.f,
0.f, 0.f, 0.f,
12.f, 0.f, 17.f,
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
}
}
TEST(max_unpooling_gpu, basic_in2x3x2x2_fp16) {
// Input : 2x2x2x1
// Argmax : 2x2x2x1
// Output : 2x2x3x2
// Input values in fp16
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Input:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
//
// Output:
// f0: b0: 0 0 0 b1: 0 0 0
// f0: b0: 0 4 0 b1: 0.5 0 0
// f1: b0: 0 0 0 b1: 0 0 0
// f1: b0: 0 8 16 b1: 12 0 17
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 2, 1 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
set_values(input, {
FLOAT16(4.0f), FLOAT16(4.0f),
FLOAT16(8.0f), FLOAT16(16.0f),
FLOAT16(0.5f), FLOAT16(0.0f),
FLOAT16(12.0f), FLOAT16(17.0f)
});
set_values(arg_max, {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("arg_max", arg_max));
topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("max_unpooling").get_memory();
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(output_layout.spatial(1), 2);
EXPECT_EQ(output_layout.spatial(0), 3);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_output_vec = {
0.f, 0.f, 0.f,
0.f, 4.f, 0.f,
0.f, 0.f, 0.f,
0.f, 8.f, 16.f,
0.f, 0.f, 0.f,
0.5f, 0.f, 0.f,
0.f, 0.f, 0.f,
12.f, 0.f, 17.f,
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i]));
}
}
TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) {
// Input : 2x2x3x2
// Argmax : 2x2x2x1
// Output : 2x2x2x2
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Forward Max Pooling Output:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
//
// Unpooling output:
// f0: b0: 0 0 0 b1: 0 0 0
// f0: b0: 0 4 0 b1: 0.5 0 0
// f1: b0: 0 0 0 b1: 0 0 0
// f1: b0: 0 8 16 b1: 12 0 17
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
set_values(input, {
1.0f, 2.0f, -10.f,
3.0f, 4.0f, -14.f,
5.0f, 6.0f, -12.f,
7.0f, 8.0f, 16.0f,
0.f, 0.f, -11.f,
0.5f, -0.5f, -15.f,
1.5f, 5.2f, -13.f,
12.f, 9.f, 17.f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(mutable_data("arg_max", arg_max));
topology.add(pooling("pooling_max_with_argmax", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }));
topology.add(max_unpooling("max_unpooling", "pooling_max_with_argmax", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("max_unpooling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(output_layout.spatial(1), 2);
EXPECT_EQ(output_layout.spatial(0), 3);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_argmax_vec = {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
};
std::vector<float> expected_output_vec = {
0.f, 0.f, 0.f,
0.f, 4.f, 0.f,
0.f, 0.f, 0.f,
0.f, 8.f, 16.f,
0.f, 0.f, 0.f,
0.5f, 0.f, 0.f,
0.f, 0.f, 0.f,
12.f, 0.f, 17.f,
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
}
for (size_t i = 0; i < expected_argmax_vec.size(); ++i) {
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
}
}

View File

@ -125,18 +125,12 @@ VVVF<typename pooling_mode_output<InputT, Mode>::type> reference_pooling(const V
int stride_z,
int offset_x,
int offset_y,
int offset_z,
bool global_pooling) {
int offset_z) {
using output_t = typename pooling_mode_output<InputT, Mode>::type;
VVVF<output_t> result;
auto size_x = input[0][0].size();
auto size_y = input[0].size();
auto size_z = input.size();
if (global_pooling) {
pool_z = size_z;
pool_y = size_y;
pool_x = size_x;
}
auto accumulator = pooling_accumulator<InputT, Mode>();
@ -279,141 +273,6 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) {
EXPECT_EQ(2.0f, output_ptr[0]);
}
TEST(pooling_forward_gpu, basic_max_yxfb_f32_global_i3x3x1x1_nopad) {
// Brief test description.
//
// Pool mode: max
// Global pooling: true
// Padding: none
//
// Input data:
// [-0.5, 1.0, 0.5]
// [ 2.0, 1.5, -0.5]
// [ 0.0, -1.0, 0.5]
//
// Expected output:
// [ 2.0]
auto& engine = get_test_engine();
auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 3, 3 } });
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(pooling("pool_prim", "input_prim", pooling_mode::max));
network network(engine, topology);
set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
network.set_input_data("input_prim", input_prim);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "pool_prim");
auto output_prim = outputs.begin()->second.get_memory();
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
EXPECT_EQ(2.0f, output_ptr[0]);
}
TEST(pooling_forward_gpu, basic_max_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) {
// Brief test description.
//
// Pool mode: max
// Global pooling: true
// Padding: none
auto& engine = get_test_engine();
auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } });
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(pooling("pool_prim", "input_prim", pooling_mode::max));
network network(engine, topology);
std::vector<char> vals = {
0, 3, 2, -1, 6, 8, 3, -9, 6, -1, 1, 7, -1, 6, 18, 3,
-9, 5, -2, 2, 6, -1, 6, 7, 3, -9, 6, -3, 3, 5, -1, 16,
8, 3, -9, 6, -4, 4, 3, -1, 6, 8, 33, -9, 6, -5, 5, 21,
-1, 6, 8, 3, -9, 6, -5, 36, 2, -1, 6, 8, 3, -9, 6, -6,
6, 1, -1, 6, 8, 3, -9, 66, -7, 7, 29, -1, 6, 8, 3, -9,
6, 44, 8, -2, -1, 6, 8, 3, -9, 6, -8, 9, -1, 10, 6, 8,
3, -9, 6, -9, 10, -3, -1, 6, 8, 3, 99, 6, -10, 11, -4, -1,
6, 8, 3, -9, 64, -11, 12, -5, -1, 6, 8, 38, -9, 6, -12, 13,
-2, -1, 6, 81, 3, -9, 6, -13, 14, -2, -1, 64, 8, 3, -9, 6,
};
set_values(input_prim, vals);
network.set_input_data("input_prim", input_prim);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "pool_prim");
auto output_prim = outputs.begin()->second.get_memory();
cldnn::mem_lock<char> output_ptr(output_prim, get_test_stream());
std::vector<char> answers = { 8, 44, 8, 81, 64, 8, 12, 66, 14, 8, 99, 64, 8, 11, 18, 21 };
ASSERT_EQ(answers.size(), output_ptr.size());
for (size_t i = 0; i < output_ptr.size(); ++i) {
EXPECT_EQ(answers[i], output_ptr[i]);
}
}
TEST(pooling_forward_gpu, basic_avg_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) {
// Brief test description.
//
// Pool mode: avg
// Global pooling: true
// Padding: none
auto& engine = get_test_engine();
auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } });
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(pooling("pool_prim", "input_prim", pooling_mode::average));
network network(engine, topology);
std::vector<char> vals = {
0, 3, 2, -1, 6, 8, 3, -9, 6, -1, 1, 7, -1, 6, 18, 3,
-9, 5, -2, 2, 6, -1, 6, 7, 3, -9, 6, -3, 3, 5, -1, 16,
8, 3, -9, 6, -4, 4, 3, -1, 6, 8, 33, -9, 6, -5, 5, 21,
-1, 6, 8, 3, -9, 6, -5, 36, 2, -1, 6, 8, 3, -9, 6, -6,
6, 1, -1, 6, 8, 3, -9, 66, -7, 7, 29, -1, 6, 8, 3, -9,
6, 44, 8, -2, -1, 6, 8, 3, -9, 6, -8, 9, -1, 10, 6, 8,
3, -9, 6, -9, 10, -3, -1, 6, 8, 3, 99, 6, -10, 11, -4, -1,
6, 8, 3, -9, 64, -11, 12, -5, -1, 6, 8, 38, -9, 6, -12, 13,
-2, -1, 6, 81, 3, -9, 6, -13, 14, -2, -1, 64, 8, 3, -9, 6,
};
set_values(input_prim, vals);
network.set_input_data("input_prim", input_prim);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "pool_prim");
auto output_prim = outputs.begin()->second.get_memory();
cldnn::mem_lock<uint8_t> output_ptr(output_prim, get_test_stream());
std::vector<uint8_t> answers = {
29, 199, 241, 63, 85, 85, 213, 64, 85, 85, 21, 64, 142, 227, 8, 65,
57, 142, 19, 65, 171, 170, 170, 62, 57, 142, 35, 64, 0, 0, 32, 65,
199, 113, 28, 64, 29, 199, 241, 63, 29, 199, 153, 65, 57, 142, 83, 65,
228, 56, 14, 63, 142, 227, 120, 64, 171, 170, 170, 63, 85, 85, 181, 64,
};
ASSERT_EQ(answers.size(), output_ptr.size());
for (size_t i = 0; i < output_ptr.size(); ++i) {
EXPECT_EQ(answers[i], output_ptr[i]) << i;
}
}
TEST(pooling_forward_gpu, basic_max_pooling_int8) {
auto& engine = get_test_engine();
@ -966,7 +825,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
network network(engine, topology);
set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
@ -1027,7 +886,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
network network(engine, topology);
@ -1098,7 +957,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ {0,0,1,2}, 0 })));
topology.add(pooling("pool_prim", "reorder", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
topology.add(pooling("pool_prim", "reorder", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
network network(engine, topology);
set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
@ -1161,7 +1020,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })));
topology.add(pooling("pool_prim", "reorder", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
topology.add(pooling("pool_prim", "reorder", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
network network(engine, topology);
@ -1232,7 +1091,7 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })));
topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, padding{ { 0, 0, 2, 2 }, 0 }));
topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, 2, 2 }, 0 }));
network network(engine, topology);
set_values(input_prim, {
@ -1300,7 +1159,7 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })));
topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
network network(engine, topology);
@ -1337,407 +1196,6 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou
}
}
TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) {
// Input : 2x2x3x2
// Argmax : 2x2x2x1
// Output : 2x2x2x2
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Output:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
set_values(input, {
1.0f, 2.0f, -10.f,
3.0f, 4.0f, -14.f,
5.0f, 6.0f, -12.f,
7.0f, 8.0f, 16.0f,
0.f, 0.f, -11.f,
0.5f, -0.5f, -15.f,
1.5f, 5.2f, -13.f,
12.f, 9.f, 17.f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(mutable_data("arg_max", arg_max));
topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("pooling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(output_layout.spatial(1), 1);
EXPECT_EQ(output_layout.spatial(0), 2);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_argmax_vec = {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
};
std::vector<float> expected_output_vec = {
4.0f, 4.0f,
8.0f, 16.0f,
0.5f, 0.0f,
12.0f, 17.0f
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
}
}
TEST(pooling_forward_gpu, basic_in2x2x3x2x1_max_with_argmax) {
// Input : 2x2x3x2x1
// Argmax : 2x2x2x1x1
// Output : 2x2x2x2x1
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Output:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 3, 2, 1 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 2, 1, 1 } });
set_values(input, {
1.0f, 2.0f, -10.f,
3.0f, 4.0f, -14.f,
5.0f, 6.0f, -12.f,
7.0f, 8.0f, 16.0f,
0.f, 0.f, -11.f,
0.5f, -0.5f, -15.f,
1.5f, 5.2f, -13.f,
12.f, 9.f, 17.f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(mutable_data("arg_max", arg_max));
topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 2, 2 }, { 1, 1, 1 }, {0, 0, 0}));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("pooling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
EXPECT_EQ(output_layout.format, format::bfzyx);
EXPECT_EQ(output_layout.spatial(2), 1);
EXPECT_EQ(output_layout.spatial(1), 1);
EXPECT_EQ(output_layout.spatial(0), 2);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_argmax_vec = {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
};
std::vector<float> expected_output_vec = {
4.0f, 4.0f,
8.0f, 16.0f,
0.5f, 0.0f,
12.0f, 17.0f
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
}
}
TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) {
// Input : 2x2x3x2
// Argmax : 2x2x2x1
// Output : 2x2x2x2
// Input Padding : 2x2
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Output:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
set_values(input, {
1.0f, 2.0f, -10.f,
3.0f, 4.0f, -14.f,
5.0f, 6.0f, -12.f,
7.0f, 8.0f, 16.0f,
0.f, 0.f, -11.f,
0.5f, -0.5f, -15.f,
1.5f, 5.2f, -13.f,
12.f, 9.f, 17.f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 })));
topology.add(mutable_data("arg_max", arg_max));
topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("pooling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(output_layout.spatial(1), 1);
EXPECT_EQ(output_layout.spatial(0), 2);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_argmax_vec = {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
};
std::vector<float> expected_output_vec = {
4.0f, 4.0f,
8.0f, 16.0f,
0.5f, 0.0f,
12.0f, 17.0f
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
}
}
TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) {
// Input : 2x2x3x2
// Argmax : 2x2x2x1
// Output : 2x2x2x2
// Output Padding : 2x2
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Output:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
set_values(input, {
1.0f, 2.0f, -10.f,
3.0f, 4.0f, -14.f,
5.0f, 6.0f, -12.f,
7.0f, 8.0f, 16.0f,
0.f, 0.f, -11.f,
0.5f, -0.5f, -15.f,
1.5f, 5.2f, -13.f,
12.f, 9.f, 17.f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 })));
topology.add(mutable_data("arg_max", arg_max));
topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }, { 0, 0 }, padding({ 0, 0, 1, 1 }, 0)));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("pooling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(output_layout.spatial(1), 1);
EXPECT_EQ(output_layout.spatial(0), 2);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_argmax_vec = {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
};
std::vector<float> expected_output_vec = {
0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 4.0f, 4.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 8.0f, 16.0f,0.0f,
0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f,
0.0f, 12.0f, 17.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f,
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
}
for (size_t i = 0; i < expected_argmax_vec.size(); ++i) {
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
}
}
TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) {
// Input : 2x2x3x2
// Argmax : 2x2x2x1
// Output : 2x2x2x2
// Forward Max Pooling Input:
// f0: b0: 1 2 -10 b1: 0 0 -11
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
// f1: b0: 7 8 16 b1: 12 9 17
//
// Output:
// f0: b0: 4 4 b1: 0.5 0
// f1: b0: 8 16 b1: 12 17
//
// Argmax:
// f0: b0: 4 4 b1: 15 13
// f1: b0: 10 11 b1: 21 23
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
set_values(input, {
1.0f, 2.0f, -10.f,
3.0f, 4.0f, -14.f,
5.0f, 6.0f, -12.f,
7.0f, 8.0f, 16.0f,
0.f, 0.f, -11.f,
0.5f, -0.5f, -15.f,
1.5f, 5.2f, -13.f,
12.f, 9.f, 17.f
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(mutable_data("arg_max", arg_max));
topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }, { 0, 0 }, { 2, 2, 2, 1 }));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("pooling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output_layout = output->get_layout();
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
EXPECT_EQ(output_layout.format, format::bfyx);
EXPECT_EQ(output_layout.spatial(1), 1);
EXPECT_EQ(output_layout.spatial(0), 2);
EXPECT_EQ(output_layout.feature(), 2);
EXPECT_EQ(output_layout.batch(), 2);
std::vector<float> expected_argmax_vec = {
4.0f, 4.0f,
10.0f, 11.0f,
15.0f, 13.0f,
21.0f, 23.0f
};
std::vector<float> expected_output_vec = {
4.0f, 4.0f,
8.0f, 16.0f,
0.5f, 0.0f,
12.0f, 17.0f
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
}
}
template <class DataType>
static void generic_average_wo_padding_test(format fmt, tensor output, tensor input, ov::Shape window, ov::Strides stride, ov::Shape offset)
{
@ -2259,7 +1717,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor)));
topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, { 1,1,4,4 }, padding{ { 0, 0, 1, 1 }, 0 })));
network network(engine, topology);
@ -2332,7 +1790,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x
topology topology;
topology.add(input_layout("input_prim", input_prim->get_layout()));
topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ { 0, 0, 2, 1 } , 0 })));
topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0, 0, 1, 1 }, 0 })));
network network(engine, topology);
@ -2409,7 +1867,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3
topology golden_topology;
golden_topology.add(input_layout("input", input_prim->get_layout()));
golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout().with_padding(padding{ {0,0,x_in_pad,y_in_pad},0 })));
golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
network golden_network(engine, golden_topology);
golden_network.set_input_data("input", input_prim);
@ -2426,7 +1884,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3
topology golden_topology;
golden_topology.add(input_layout("input", input_prim->get_layout()));
golden_topology.add(reorder("reorder_input", "input", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ {0, 0, x_in_pad, y_in_pad}, 0 })));
golden_topology.add(pooling("fsv32_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
golden_topology.add(pooling("fsv32_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
golden_topology.add(reorder("reorder_pooling", "fsv32_pooling", layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0,0,x_out_pad,y_out_pad },0 })));
network fsv32_network(engine, golden_topology);
@ -2468,15 +1926,8 @@ public:
stride.insert(stride.begin(), stride_z());
pad.insert(pad.begin(), offset_z());
}
if (global_pooling())
topo.add(pooling("pool", "input", pool_mode()));
else
topo.add(pooling("pool",
"input",
pool_mode(),
kernel,
stride,
pad));
topo.add(pooling("pool", "input", pool_mode(), kernel, stride, pad));
return topo;
}
@ -2579,7 +2030,6 @@ public:
size_t offset_x() { return _offset_x; }
size_t offset_y() { return _offset_y; }
size_t offset_z() { return _offset_z; }
bool global_pooling() { return _global_pooling; }
void set_input(format::type input_fmt, VVVVVF<InputT> input_data) {
_input_fmt = input_fmt;
@ -2604,16 +2054,11 @@ public:
_offset_z = z;
}
void set_global_pooling(bool global_pooling) {
_global_pooling = global_pooling;
}
VVVVVF<InputT> _input;
format::type _input_fmt;
size_t _pool_x, _pool_y, _pool_z;
int _stride_x, _stride_y, _stride_z;
int _offset_x, _offset_y, _offset_z;
bool _global_pooling;
};
using pooling_random_test_params = std::tuple<
@ -2623,8 +2068,7 @@ using pooling_random_test_params = std::tuple<
std::tuple<size_t, size_t, size_t>, // pool x, y, z
std::tuple<int, int, int>, // stride x, y, z
std::tuple<int, int, int>, // offset x, y, z
format::type, // input format
bool // global pooling
format::type // input format
>;
template <typename InputT, pooling_mode Mode>
@ -2647,8 +2091,7 @@ public:
this->stride_z(),
this->offset_x(),
this->offset_y(),
this->offset_z(),
this->global_pooling());
this->offset_z());
}
}
return reference;
@ -2658,7 +2101,6 @@ public:
size_t b, f, in_x, in_y, in_z, p_x, p_y, p_z;
int s_x, s_y, s_z, o_x, o_y, o_z;
format::type in_fmt;
bool global_pooling;
std::forward_as_tuple(
b,
@ -2667,24 +2109,16 @@ public:
std::forward_as_tuple(p_x, p_y, p_z),
std::forward_as_tuple(s_x, s_y, s_z),
std::forward_as_tuple(o_x, o_y, o_z),
in_fmt,
global_pooling
in_fmt
) = params;
auto input_data = generate_random_5d<InputT>(b, f, in_z, in_y, in_x, -256, 256);
this->set_input(in_fmt, std::move(input_data));
if (global_pooling) {
this->set_pool_size(0, 0, 0);
this->set_strides(1, 1, 1);
this->set_offsets(0, 0, 0);
} else {
this->set_pool_size(p_x, p_y, p_z);
this->set_strides(s_x, s_y, s_z);
this->set_offsets(o_x, o_y, o_z);
}
this->set_global_pooling(global_pooling);
}
void run_random(const pooling_random_test_params& params) {
param_set_up(params);
@ -2733,8 +2167,7 @@ INSTANTIATE_TEST_SUITE_P(
format::bfyx,
format::b_fs_yx_fsv4,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32),
testing::Values(false, true)),
format::b_fs_yx_fsv32)),
testing::internal::DefaultParamName<pooling_random_test_params>);
INSTANTIATE_TEST_SUITE_P(
@ -2747,8 +2180,7 @@ INSTANTIATE_TEST_SUITE_P(
testing::Values(std::tuple<int, int, int>(2, 2, 2)),
testing::Values(std::tuple<int, int, int>(0, 0, 0)),
testing::Values(format::bfzyx,
format::b_fs_zyx_fsv16),
testing::Values(false, true)),
format::b_fs_zyx_fsv16)),
testing::internal::DefaultParamName<pooling_random_test_params>);
INSTANTIATE_TEST_SUITE_P(
@ -2761,9 +2193,7 @@ INSTANTIATE_TEST_SUITE_P(
testing::Values(std::tuple<size_t, size_t, size_t>(1, 1, 1), std::tuple<size_t, size_t, size_t>(3, 3, 1)),
testing::Values(std::tuple<int, int, int>(1, 1, 1)),
testing::Values(std::tuple<int, int, int>(0, 0, 0)),
testing::Values(format::bs_fs_yx_bsv16_fsv16),
testing::Values(false, true)
),
testing::Values(format::bs_fs_yx_bsv16_fsv16)),
testing::internal::DefaultParamName<pooling_random_test_params>);
template <typename InputT, pooling_mode Mode>
@ -2851,8 +2281,7 @@ INSTANTIATE_TEST_SUITE_P(
format::b_fs_yx_fsv16,
format::fs_b_yx_fsv32,
format::b_fs_yx_fsv32,
format::b_fs_yx_fsv4),
testing::Values(false)),
format::b_fs_yx_fsv4)),
testing::internal::DefaultParamName<pooling_random_test_params>);
TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride)
@ -3485,10 +2914,10 @@ public:
all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride));
// Output padding
all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_pad(2, 3, size), { { 0, 0, 1, 5 }, { 0, 0, 19, 4 } }));
all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_pad(2, 3, size), generate_pad(2, 3, size), { { 0, 0, 1, 5 }, { 0, 0, 19, 4 } }));
// Input + output padding
all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_pad(2, 3, size), { { 0, 0, 2, 1 }, { 0, 0, 3, 4 } }));
all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_pad(2, 3, size), generate_pad(2, 3, size), { { 0, 0, 2, 1 }, { 0, 0, 3, 4 } }));
}
}
}
@ -3545,8 +2974,8 @@ public:
int height = generic_params->input_layouts[0].spatial(1);
int width = generic_params->input_layouts[0].spatial(0);
auto pad_height = pooling->pad[0];
auto pad_width = pooling->pad[1];
auto pad_height = pooling->pads_begin[0];
auto pad_width = pooling->pads_begin[1];
auto kernel_height = pooling->size[0];
auto kernel_width = pooling->size[1];
@ -3581,8 +3010,8 @@ public:
cldnn::pooling_mode pooling_mode = pooling->mode;
int pad_width = pooling->pad[1];
int pad_height = pooling->pad[0];
int pad_width = pooling->pads_begin[1];
int pad_height = pooling->pads_begin[0];
int kernel_width = pooling->size[1];
int kernel_height = pooling->size[0];

View File

@ -552,7 +552,8 @@ inline void PrintTupleTo(const std::tuple<std::shared_ptr<test_params>, std::sha
auto pooling = std::static_pointer_cast<cldnn::pooling>(primitive);
std::string pooling_mode = (pooling->mode == cldnn::pooling_mode::max) ? "max" : "average";
str << "Pooling mode: " << pooling_mode
<< " Pad x: " << pooling->pad[1] << " Pad y: " << pooling->pad[0]
<< " Pads_begin x: " << pooling->pads_begin[1] << " Pads_begin y: " << pooling->pads_begin[0]
<< " Pads_end x: " << pooling->pads_end[1] << " Pads_end y: " << pooling->pads_end[0]
<< " Stride x: " << pooling->stride[1] << " Stride y: " << pooling->stride[0]
<< " Size x: " << pooling->size[1] << " Size y: " << pooling->size[0];
} else {