[GPU] Pooling refactoring (#13664)
This commit is contained in:
parent
c11389fa18
commit
2b5dd4fac8
@ -1,82 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Performs "max_unpooling" operation.
|
||||
/// @details Reverse operation of max pooling, based on the argmax data where indices of each max pooling region are stored.
|
||||
struct max_unpooling : public primitive_base<max_unpooling> {
|
||||
CLDNN_DECLARE_PRIMITIVE(max_unpooling)
|
||||
|
||||
/// @brief Constructs max_unpooling primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param argmax Primitive id which contains indices of each max pooling region.
|
||||
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// Used only for output size computation.
|
||||
/// @param size Pooling kernel size. Used only for output size computation.
|
||||
/// @param pad Defines logical pad value added to input tensor. Used only for output size computation.
|
||||
max_unpooling(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& argmax,
|
||||
const tensor& size,
|
||||
const tensor& stride,
|
||||
const tensor& pad = {0, 0, 0, 0},
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
argmax(argmax),
|
||||
pad(pad),
|
||||
stride(stride),
|
||||
size(size),
|
||||
with_output_size(false) {}
|
||||
|
||||
/// @brief Constructs max_unpooling primitive (with provided output size)
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param argmax Primitive id which contains indices of each max pooling region.
|
||||
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
|
||||
/// @param output_size User-defined output data size of the primitive (w/o padding).
|
||||
max_unpooling(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& argmax,
|
||||
tensor output_size,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
argmax(argmax),
|
||||
with_output_size(true),
|
||||
output_size(output_size) {}
|
||||
|
||||
/// @brief Primitive id which contains indices of each max pooling region.
|
||||
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
|
||||
primitive_id argmax;
|
||||
/// @brief Defines logical pad value added to input tensor.
|
||||
tensor pad;
|
||||
/// @brief Defines shift in input buffer between adjacent calculations of output values. Used only for output size computation.
|
||||
tensor stride;
|
||||
/// @brief Pooling kernel size. Used only for output size computation.
|
||||
tensor size;
|
||||
/// @brief Indicates that the primitive has user-defined output size (non-zero value). Used only for output size computation.
|
||||
bool with_output_size;
|
||||
/// @brief User-defined output data size of the primitive (w/o padding).
|
||||
tensor output_size;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {argmax}; }
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -26,8 +26,6 @@ enum class pooling_mode : int32_t {
|
||||
average,
|
||||
/// @brief Average-pooling method without values which are outside of the input.
|
||||
average_no_padding,
|
||||
/// @brief Maximum-pooling method with additional buffer to store argmax indices.
|
||||
max_with_argmax,
|
||||
/// @brief Pooling with bilinear interpolation.
|
||||
bilinear,
|
||||
/// @brief Deformable pooling with bilinear interpolation.
|
||||
@ -51,46 +49,18 @@ struct pooling : public primitive_base<pooling> {
|
||||
pooling_mode mode,
|
||||
const ov::Shape& size,
|
||||
const ov::Strides& stride,
|
||||
const ov::Shape& pad = {0, 0},
|
||||
const ov::Shape& pads_begin = {0, 0},
|
||||
const ov::Shape& pads_end = {0, 0},
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
argmax(""),
|
||||
mode(static_cast<pooling_mode>(mode)),
|
||||
global_pooling(false),
|
||||
pad(pad),
|
||||
pads_begin(pads_begin),
|
||||
pads_end(pads_end),
|
||||
stride(stride),
|
||||
size(size),
|
||||
with_output_size(false),
|
||||
pad_end(size.size(), 0) {}
|
||||
with_output_size(false) {}
|
||||
|
||||
/// @brief Constructs pooling primitive with argmax.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param argmax Primitive id which contains indices of each max pooling region.
|
||||
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
|
||||
/// @param mode Pooling mode.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param size Pooling kernel size.
|
||||
/// @param pad Defines logical pad value added to input tensor
|
||||
pooling(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& argmax,
|
||||
pooling_mode mode,
|
||||
const ov::Shape& size,
|
||||
const ov::Strides& stride,
|
||||
const ov::Shape& pad = {0, 0},
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
argmax(argmax),
|
||||
mode(static_cast<pooling_mode>(mode)),
|
||||
global_pooling(false),
|
||||
pad(pad),
|
||||
stride(stride),
|
||||
size(size),
|
||||
with_output_size(false),
|
||||
pad_end(size.size(), 0) {}
|
||||
|
||||
/// @brief Constructs pooling primitive (computes input paddings to match output size).
|
||||
/// @brief Constructs pooling primitive with known output shape.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param mode Pooling mode.
|
||||
@ -103,68 +73,19 @@ struct pooling : public primitive_base<pooling> {
|
||||
pooling_mode mode,
|
||||
const ov::Shape& size,
|
||||
const ov::Strides& stride,
|
||||
const ov::Shape& pad,
|
||||
const ov::Shape& pads_begin,
|
||||
const ov::Shape& pads_end,
|
||||
tensor output_size,
|
||||
const data_types output_data_type,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding, optional_data_type{output_data_type}),
|
||||
argmax(""),
|
||||
mode(static_cast<pooling_mode>(mode)),
|
||||
global_pooling(false),
|
||||
pad(pad),
|
||||
pads_begin(pads_begin),
|
||||
pads_end(pads_end),
|
||||
stride(stride),
|
||||
size(size),
|
||||
with_output_size(true),
|
||||
output_size(output_size),
|
||||
pad_end(size.size(), 0) {}
|
||||
|
||||
/// @brief Constructs pooling primitive with argmax (computes input paddings to match output size).
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param argmax Primitive id which contains indices of each max pooling region.
|
||||
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
|
||||
/// @param mode Pooling mode.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param size Pooling kernel size.
|
||||
/// @param pad Defines logical pad value added to input tensor.
|
||||
/// @param output_size User-defined output data size of the primitive (w/o padding).
|
||||
pooling(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& argmax,
|
||||
pooling_mode mode,
|
||||
const ov::Shape& size,
|
||||
const ov::Strides& stride,
|
||||
const ov::Shape& pad,
|
||||
tensor output_size,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
argmax(argmax),
|
||||
mode(static_cast<pooling_mode>(mode)),
|
||||
global_pooling(false),
|
||||
pad(pad),
|
||||
stride(stride),
|
||||
size(size),
|
||||
with_output_size(true),
|
||||
output_size(output_size),
|
||||
pad_end(size.size(), 0) {}
|
||||
|
||||
/// @brief Constructs pooling primitive with kernel size equal to the spatial dimension of input tensor.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param mode Pooling mode.
|
||||
pooling(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
pooling_mode mode,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
argmax(""),
|
||||
mode(static_cast<pooling_mode>(mode)),
|
||||
global_pooling(true),
|
||||
pad({0, 0}),
|
||||
stride({1, 1}),
|
||||
size({0, 0}),
|
||||
with_output_size(false),
|
||||
pad_end(size.size(), 0) {}
|
||||
output_size(output_size) {}
|
||||
|
||||
/// @brief Constructs pooling primitive that supports MaxPool features from opset8 (dilation and indices output).
|
||||
/// @param id This primitive id.
|
||||
@ -184,41 +105,35 @@ struct pooling : public primitive_base<pooling> {
|
||||
const ov::Shape& size,
|
||||
const ov::Strides& stride,
|
||||
const ov::Strides& dilation,
|
||||
const ov::Shape& pad,
|
||||
const ov::Shape& pad_end,
|
||||
const ov::Shape& pads_begin,
|
||||
const ov::Shape& pads_end,
|
||||
int64_t axis,
|
||||
data_types index_element_type,
|
||||
tensor output_size,
|
||||
const data_types output_data_type,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, indices_output}, output_padding, optional_data_type{output_data_type}),
|
||||
argmax(""),
|
||||
indices_output(indices_output),
|
||||
mode(pooling_mode::max),
|
||||
global_pooling(false),
|
||||
pad(pad),
|
||||
pads_begin(pads_begin),
|
||||
pads_end(pads_end),
|
||||
stride(stride),
|
||||
dilation(dilation),
|
||||
size(size),
|
||||
with_output_size(true),
|
||||
output_size(output_size),
|
||||
pad_end(pad_end),
|
||||
axis(axis),
|
||||
index_element_type(index_element_type),
|
||||
maxPoolOpset8Features(true)
|
||||
{}
|
||||
maxPoolOpset8Features(true) {}
|
||||
|
||||
/// @brief Primitive id which contains indices of each max pooling region.
|
||||
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
|
||||
primitive_id argmax;
|
||||
/// @brief Primitive id which contains indices output.
|
||||
primitive_id indices_output;
|
||||
/// @brief Pooling mode.
|
||||
pooling_mode mode;
|
||||
/// @brief Global pooling (kernel size is equal to the spatial dimension of input tensor)
|
||||
bool global_pooling;
|
||||
/// @brief Defines logical pad value added to input tensor.
|
||||
ov::Shape pad;
|
||||
ov::Shape pads_begin;
|
||||
/// @brief Defines a shift, relative to the end of padding shape.
|
||||
ov::Shape pads_end;
|
||||
/// @brief Defines shift in input buffer between adjacent calculations of output values.
|
||||
ov::Strides stride;
|
||||
/// @brief Defines index of next pixel to select when pooling
|
||||
@ -229,8 +144,6 @@ struct pooling : public primitive_base<pooling> {
|
||||
bool with_output_size;
|
||||
/// @brief User-defined output data size of the primitive (w/o padding).
|
||||
tensor output_size;
|
||||
/// @brief Defines a shift, relative to the end of padding shape.
|
||||
ov::Shape pad_end;
|
||||
/// @brief first dimension of input that should be used to calculate the upper bound of index output
|
||||
int64_t axis = 0;
|
||||
/// @brief type of index output
|
||||
@ -240,8 +153,6 @@ struct pooling : public primitive_base<pooling> {
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
if (!argmax.empty())
|
||||
ret.push_back(argmax);
|
||||
if (!indices_output.empty())
|
||||
ret.push_back(indices_output);
|
||||
return ret;
|
||||
|
@ -111,7 +111,7 @@ void prepare_padding::run(program& p) {
|
||||
needed_padding = calc_sliding_window_needed_input_padding(prim_node.input().get_output_layout(),
|
||||
prim->output_size,
|
||||
size,
|
||||
ov::CoordinateDiff(prim->pad.begin(), prim->pad.end()),
|
||||
ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()),
|
||||
prim->stride,
|
||||
ov::Strides(prim->size.size(), 1),
|
||||
false,
|
||||
|
@ -649,11 +649,6 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
return data_type_traits::is_i8_u8(in_dt);
|
||||
};
|
||||
|
||||
auto pooling_supports_fusings = [](pooling_node& node) -> bool {
|
||||
auto pooling_mode = node.get_primitive()->mode;
|
||||
return pooling_mode != cldnn::pooling_mode::max_with_argmax;
|
||||
};
|
||||
|
||||
auto dts_supports_fusings = [](depth_to_space_node& node) -> bool {
|
||||
bool input_conv = node.get_dependency(0).is_type<convolution>();
|
||||
bool out_eltw = node.get_users().front()->is_type<eltwise>();
|
||||
@ -772,7 +767,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
|
||||
should_fuse |= input_data.is_type<gemm>() && gemm_supports_fusings(input_data.as<gemm>());
|
||||
|
||||
should_fuse |= input_data.is_type<pooling>() && pooling_supports_fusings(input_data.as<pooling>());
|
||||
should_fuse |= input_data.is_type<pooling>();
|
||||
|
||||
should_fuse |= input_data.is_type<resample>();
|
||||
|
||||
@ -861,8 +856,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
_lo.get_optimization_attributes().use_onednn_impls ||
|
||||
(in_dt_is_i8_u8 && out_dt_is_i8_u8));
|
||||
|
||||
should_fuse |= input_data.is_type<pooling>() && quantize_node.get_scale_shift_opt() &&
|
||||
pooling_supports_fusings(input_data.as<pooling>());
|
||||
should_fuse |= input_data.is_type<pooling>() && quantize_node.get_scale_shift_opt();
|
||||
|
||||
should_fuse |= input_data.is_type<fully_connected>() && quantize_node.get_scale_shift_opt();
|
||||
|
||||
@ -960,7 +954,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
(parents[i]->is_type<gather_elements>()) ||
|
||||
(parents[i]->is_type<scatter_nd_update>()) ||
|
||||
(parents[i]->is_type<scatter_elements_update>()) ||
|
||||
(parents[i]->is_type<pooling>() && pooling_supports_fusings(parents[i]->as<pooling>())) ||
|
||||
(parents[i]->is_type<pooling>()) ||
|
||||
(parents[i]->is_type<depth_to_space>() && dts_supports_fusings(parents[i]->as<depth_to_space>())) ||
|
||||
(parents[i]->is_type<gather>()) ||
|
||||
(parents[i]->is_type<reduce>() && reduce_supports_fusings(parents[i]->as<reduce>())) ||
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
// ToDo: remove those include with the appropriate code below once we will have support for multiple outputs of a
|
||||
// primitive
|
||||
#include "max_unpooling_inst.h"
|
||||
#include "pooling_inst.h"
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
@ -27,13 +26,10 @@ void trim_to_outputs::run(program& p) {
|
||||
queue.push(&p.get_outputs());
|
||||
|
||||
std::vector<program_node*> special_nodes;
|
||||
for (auto& node : p.get_processing_order()) {
|
||||
if (node->is_type<input_layout>() || // input layout may become disconnected during prior boxes calculations so
|
||||
// it may have not been marked at this place but we don't want to remove it
|
||||
node->is_type<max_unpooling>() || // ToDo: remove this after support for multi-outputs in primitives will
|
||||
// be implemented.
|
||||
(node->is_type<pooling>() && node->as<pooling>().get_primitive()->mode == pooling_mode::max_with_argmax))
|
||||
special_nodes.push_back(node);
|
||||
for (auto& node : p.get_processing_order()) { // input layout may become disconnected during prior boxes calculations so
|
||||
if (node->is_type<input_layout>()) { // it may have not been marked at this place but we don't want to remove it
|
||||
special_nodes.push_back(node); // ToDo: remove this after support for multi-outputs in primitives will
|
||||
} // be implemented.
|
||||
}
|
||||
queue.push(&special_nodes);
|
||||
|
||||
|
@ -1,83 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "max_unpooling_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
#include "intel_gpu/graph/network.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "max_unpooling/max_unpooling_kernel_selector.h"
|
||||
#include "max_unpooling/max_unpooling_kernel_base.h"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct max_unpooling_impl : typed_primitive_impl_ocl<max_unpooling> {
|
||||
using parent = typed_primitive_impl_ocl<max_unpooling>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<max_unpooling_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<max_unpooling>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
args.inputs.push_back(instance.dep_memory_ptr(1));
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
event::ptr execute_impl(const std::vector<event::ptr>& events, max_unpooling_inst& instance) override {
|
||||
// clear output buffer
|
||||
std::vector<event::ptr> tmp_events(events);
|
||||
auto& stream = instance.get_network().get_stream();
|
||||
auto ev = instance.output_memory().fill(stream);
|
||||
tmp_events.push_back(ev);
|
||||
return parent::execute_impl(tmp_events, instance);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const max_unpooling_node& arg, const kernel_impl_params& impl_param) {
|
||||
auto max_unpooling_params = get_default_params<kernel_selector::max_unpooling_params>(impl_param);
|
||||
auto max_unpooling_optional_params =
|
||||
get_default_optional_params<kernel_selector::max_unpooling_optional_params>(arg.get_program());
|
||||
|
||||
const auto max_idx = 1;
|
||||
max_unpooling_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[max_idx]));
|
||||
|
||||
auto& kernel_selector = kernel_selector::max_unpooling_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(max_unpooling_params, max_unpooling_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto max_unpool = new max_unpooling_impl(arg, best_kernels[0]);
|
||||
|
||||
return max_unpool;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_max_unpooling_impl::attach_max_unpooling_impl() {
|
||||
implementation_map<max_unpooling>::add(impl_types::ocl, max_unpooling_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -21,12 +21,10 @@ void validate_args(const pooling_node& arg) {
|
||||
auto stride_rank = arg.get_primitive()->stride.size();
|
||||
auto window_rank = arg.get_primitive()->size.size();
|
||||
|
||||
if (!arg.get_primitive()->global_pooling) {
|
||||
CLDNN_ERROR_NOT_EQUAL(arg.id(), "input dimensions", input_rank, "output dimensions", output_rank, "");
|
||||
CLDNN_ERROR_NOT_EQUAL(arg.id(), "stride dimensions", stride_rank, "output dimensions", output_rank, "");
|
||||
CLDNN_ERROR_NOT_EQUAL(arg.id(), "window dimensions", window_rank, "output dimensions", output_rank, "");
|
||||
}
|
||||
}
|
||||
|
||||
kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
|
||||
switch (mode) {
|
||||
@ -36,8 +34,6 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
|
||||
return kernel_selector::pool_type::AVG;
|
||||
case pooling_mode::average_no_padding:
|
||||
return kernel_selector::pool_type::AVG;
|
||||
case pooling_mode::max_with_argmax:
|
||||
return kernel_selector::pool_type::MAX_WITH_ARGMAX;
|
||||
default:
|
||||
assert(0);
|
||||
return kernel_selector::pool_type::MAX;
|
||||
@ -47,8 +43,6 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
|
||||
kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mode) {
|
||||
switch (mode) {
|
||||
case pooling_mode::max:
|
||||
case pooling_mode::max_with_argmax:
|
||||
return kernel_selector::kernel_divider_mode::DONT_CARE;
|
||||
case pooling_mode::average:
|
||||
return kernel_selector::kernel_divider_mode::FIXED;
|
||||
case pooling_mode::average_no_padding:
|
||||
@ -71,8 +65,6 @@ struct pooling_impl : typed_primitive_impl_ocl<pooling> {
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<pooling>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
if (!instance.argument->argmax.empty())
|
||||
args.inputs.push_back(instance.dep_memory_ptr(1));
|
||||
return args;
|
||||
}
|
||||
|
||||
@ -102,7 +94,9 @@ public:
|
||||
}
|
||||
|
||||
const auto& stride = primitive->stride;
|
||||
const auto& pad = primitive->pad;
|
||||
const auto& pads_begin = primitive->pads_begin;
|
||||
const auto& pads_end = primitive->pads_end;
|
||||
|
||||
const auto& dilation = primitive->dilation;
|
||||
auto kernel = primitive->size;
|
||||
const auto& input_layout = impl_param.input_layouts[0];
|
||||
@ -114,19 +108,12 @@ public:
|
||||
pp.poolType = cldnn_2_pool_type(primitive->mode);
|
||||
pp.remainderAction = kernel_selector::pool_remainder::CEIL;
|
||||
|
||||
if (primitive->global_pooling) {
|
||||
kernel = ov::Shape(spatial_rank, 1);
|
||||
for (size_t i = 0; i < spatial_rank; i++) {
|
||||
kernel[i] = input_layout.spatial(spatial_rank - i - 1);
|
||||
}
|
||||
}
|
||||
|
||||
// check if last pooling window goes outside of input size + padding. If so the avg pooling size will be
|
||||
// adjusted to that, to work properly this calculation must take pad_end into account.
|
||||
auto dynamic_mode = false;
|
||||
for (size_t i = 0; i < spatial_rank; i++) {
|
||||
dynamic_mode |= (((output_layout.spatial(i) - 1) * stride[spatial_rank - i - 1]) + primitive->size[spatial_rank - i - 1]) >
|
||||
(primitive->pad_end[spatial_rank - i - 1] + pad[spatial_rank - i - 1]) + input_layout.spatial(i);
|
||||
(pads_end[spatial_rank - i - 1] + pads_begin[spatial_rank - i - 1]) + input_layout.spatial(i);
|
||||
}
|
||||
|
||||
if (primitive->mode == pooling_mode::average && dynamic_mode)
|
||||
@ -134,17 +121,14 @@ public:
|
||||
else
|
||||
pp.divMode = cldnn_2_kernel_divider_mode(primitive->mode);
|
||||
|
||||
if (primitive->mode == pooling_mode::max_with_argmax)
|
||||
pool_params.inputs.push_back(convert_data_tensor(arg.argmax().get_output_layout()));
|
||||
|
||||
uint32_t kernel_z = kernel.size() >= 3 ? kernel[kernel.size() - 3] : 1;
|
||||
uint32_t kernel_y = kernel.size() >= 2 ? kernel[kernel.size() - 2] : 1;
|
||||
uint32_t kernel_x = kernel.size() >= 1 ? kernel[kernel.size() - 1] : 1;
|
||||
pp.poolSize = {kernel_x, kernel_y, kernel_z};
|
||||
|
||||
uint32_t pad_z = std::max<std::ptrdiff_t>(pad.size() >= 3 ? pad[pad.size() - 3] : 0, 0);
|
||||
uint32_t pad_y = std::max<std::ptrdiff_t>(pad.size() >= 2 ? pad[pad.size() - 2] : 0, 0);
|
||||
uint32_t pad_x = std::max<std::ptrdiff_t>(pad.size() >= 1 ? pad[pad.size() - 1] : 0, 0);
|
||||
uint32_t pad_z = std::max<std::ptrdiff_t>(pads_begin.size() >= 3 ? pads_begin[pads_begin.size() - 3] : 0, 0);
|
||||
uint32_t pad_y = std::max<std::ptrdiff_t>(pads_begin.size() >= 2 ? pads_begin[pads_begin.size() - 2] : 0, 0);
|
||||
uint32_t pad_x = std::max<std::ptrdiff_t>(pads_begin.size() >= 1 ? pads_begin[pads_begin.size() - 1] : 0, 0);
|
||||
pp.poolPad = {pad_x, pad_y, pad_z};
|
||||
|
||||
uint32_t stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1;
|
||||
|
@ -46,7 +46,6 @@ void register_implementations() {
|
||||
REGISTER_OCL(lrn);
|
||||
REGISTER_OCL(lstm_gemm);
|
||||
REGISTER_OCL(lstm_elt);
|
||||
REGISTER_OCL(max_unpooling);
|
||||
REGISTER_OCL(mutable_data);
|
||||
REGISTER_OCL(mvn);
|
||||
REGISTER_OCL(non_max_suppression);
|
||||
|
@ -43,7 +43,6 @@
|
||||
#include "intel_gpu/primitives/lstm_dynamic.hpp"
|
||||
#include "intel_gpu/primitives/lstm_dynamic_input.hpp"
|
||||
#include "intel_gpu/primitives/lstm_dynamic_timeloop.hpp"
|
||||
#include "intel_gpu/primitives/max_unpooling.hpp"
|
||||
#include "intel_gpu/primitives/mutable_data.hpp"
|
||||
#include "intel_gpu/primitives/mvn.hpp"
|
||||
#include "intel_gpu/primitives/non_max_suppression.hpp"
|
||||
@ -127,7 +126,6 @@ REGISTER_OCL(grid_sample);
|
||||
REGISTER_OCL(lrn);
|
||||
REGISTER_OCL(lstm_gemm);
|
||||
REGISTER_OCL(lstm_elt);
|
||||
REGISTER_OCL(max_unpooling);
|
||||
REGISTER_OCL(mutable_data);
|
||||
REGISTER_OCL(mvn);
|
||||
REGISTER_OCL(non_max_suppression);
|
||||
|
@ -32,17 +32,12 @@ protected:
|
||||
|
||||
dnnl::memory::dims stride(prim->stride.begin(), prim->stride.end());
|
||||
dnnl::memory::dims kernel(prim->size.begin(), prim->size.end());
|
||||
dnnl::memory::dims pad_l(prim->pad.begin(), prim->pad.end());
|
||||
dnnl::memory::dims pad_r(prim->pad_end.begin(), prim->pad_end.end());
|
||||
dnnl::memory::dims pad_l(prim->pads_begin.begin(), prim->pads_begin.end());
|
||||
dnnl::memory::dims pad_r(prim->pads_end.begin(), prim->pads_end.end());
|
||||
|
||||
auto input_md = onednn::layout_to_memory_desc(input_layout);
|
||||
auto output_md = onednn::layout_to_memory_desc(output_layout);
|
||||
|
||||
if (prim->global_pooling) {
|
||||
for (size_t i = 0; i < kernel.size(); i++)
|
||||
kernel[i] = input_md.dims()[2 + i];
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < kernel.size(); i++) {
|
||||
pad_r[i] = (output_md.dims()[2 + i] - 1) * stride[i] - input_md.dims()[2 + i] + kernel[i] - pad_l[i];
|
||||
}
|
||||
|
@ -1,40 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "intel_gpu/primitives/max_unpooling.hpp"
|
||||
#include "primitive_inst.h"
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template <>
|
||||
struct typed_program_node<max_unpooling> : public typed_program_node_base<max_unpooling> {
|
||||
using parent = typed_program_node_base<max_unpooling>;
|
||||
typed_program_node(const std::shared_ptr<max_unpooling> prim, program& prog);
|
||||
|
||||
public:
|
||||
using parent::parent;
|
||||
program_node& input() const { return get_dependency(0); }
|
||||
program_node& argmax() const { return get_dependency(1); }
|
||||
};
|
||||
|
||||
using max_unpooling_node = typed_program_node<max_unpooling>;
|
||||
|
||||
template <>
|
||||
class typed_primitive_inst<max_unpooling> : public typed_primitive_inst_base<max_unpooling> {
|
||||
using parent = typed_primitive_inst_base<max_unpooling>;
|
||||
|
||||
public:
|
||||
typed_primitive_inst(network& network, max_unpooling_node const& desc);
|
||||
static layout calc_output_layout(max_unpooling_node const& node, kernel_impl_params const& impl_param);
|
||||
static std::string to_string(max_unpooling_node const& node);
|
||||
};
|
||||
|
||||
using max_unpooling_inst = typed_primitive_inst<max_unpooling>;
|
||||
|
||||
} // namespace cldnn
|
@ -1,112 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "max_unpooling_inst.h"
|
||||
#include "primitive_type_base.h"
|
||||
#include "sliding_window_utils_legacy.h"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
#include "json_object.h"
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
namespace cldnn {
|
||||
primitive_type_id max_unpooling::type_id() {
|
||||
static primitive_type_base<max_unpooling> instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
max_unpooling_node::typed_program_node(const std::shared_ptr<max_unpooling> prim, program& prog)
|
||||
: parent(prim, prog) {
|
||||
can_share_buffer(false); // for max_unpooling initial zero values are significant
|
||||
}
|
||||
|
||||
layout max_unpooling_inst::calc_output_layout(max_unpooling_node const& node, kernel_impl_params const& impl_param) {
|
||||
assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&
|
||||
"Output data type forcing is not supported for max_unpooling_node!");
|
||||
auto desc = impl_param.typed_desc<max_unpooling>();
|
||||
|
||||
auto input_layout = impl_param.get_input_layout(0);
|
||||
auto argmax_layout = impl_param.get_input_layout(1);
|
||||
|
||||
CLDNN_ERROR_NOT_EQUAL(desc->id,
|
||||
"Argmax data type",
|
||||
static_cast<size_t>(argmax_layout.data_type),
|
||||
"expected to be fp32",
|
||||
static_cast<size_t>(data_types::f32),
|
||||
"Argmax data type is not fp32.");
|
||||
|
||||
if (desc->with_output_size) {
|
||||
tensor output_size(input_layout.batch(),
|
||||
input_layout.feature(),
|
||||
desc->output_size.spatial[0],
|
||||
desc->output_size.spatial[1]);
|
||||
return {input_layout.data_type, input_layout.format, output_size};
|
||||
}
|
||||
|
||||
auto pad = desc->pad;
|
||||
auto stride = desc->stride;
|
||||
auto window_size = desc->size;
|
||||
|
||||
CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
|
||||
"stride spatial X",
|
||||
stride.spatial[0],
|
||||
"",
|
||||
0,
|
||||
"Stride spatial X must be positive (>= 1)");
|
||||
CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
|
||||
"stride spatial Y",
|
||||
stride.spatial[1],
|
||||
"",
|
||||
0,
|
||||
"Stride spatial Y must be positive (>= 1)");
|
||||
CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
|
||||
"window size spatial X",
|
||||
window_size.spatial[0],
|
||||
"",
|
||||
0,
|
||||
"Size X (of pooling window) must be positive (>= 1)");
|
||||
CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
|
||||
"window size spatial Y",
|
||||
window_size.spatial[1],
|
||||
"",
|
||||
0,
|
||||
"Size Y (of pooling window) must be positive (>= 1)");
|
||||
|
||||
auto output_range = calc_sliding_window_needed_input_range(input_layout.get_tensor(),
|
||||
window_size,
|
||||
pad,
|
||||
stride,
|
||||
{1, 1, 1, 1},
|
||||
true,
|
||||
1);
|
||||
|
||||
tensor output_size(input_layout.batch(),
|
||||
input_layout.feature(),
|
||||
output_range.spatial[0],
|
||||
output_range.spatial[1]);
|
||||
return {input_layout.data_type, input_layout.format, output_size};
|
||||
}
|
||||
|
||||
std::string max_unpooling_inst::to_string(max_unpooling_node const& node) {
|
||||
auto desc = node.get_primitive();
|
||||
auto node_info = node.desc_to_json();
|
||||
auto& input = node.input();
|
||||
auto& argmax = node.argmax();
|
||||
|
||||
std::stringstream primitive_description;
|
||||
|
||||
json_composite max_unmax_unpooling_info;
|
||||
max_unmax_unpooling_info.add("input", input.id());
|
||||
max_unmax_unpooling_info.add("argmax", argmax.id());
|
||||
|
||||
node_info->add("max unmax_unpooling info", max_unmax_unpooling_info);
|
||||
node_info->dump(primitive_description);
|
||||
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
max_unpooling_inst::typed_primitive_inst(network& network, max_unpooling_node const& node)
|
||||
: parent(network, node) {}
|
||||
|
||||
} // namespace cldnn
|
@ -22,7 +22,7 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node, kernel_i
|
||||
|
||||
auto input_layout = impl_param.get_input_layout();
|
||||
|
||||
auto pad = desc->pad;
|
||||
auto pad = desc->pads_begin;
|
||||
auto stride = desc->stride;
|
||||
auto window_size = desc->size;
|
||||
|
||||
@ -46,43 +46,6 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node, kernel_i
|
||||
}
|
||||
}
|
||||
|
||||
if (!desc->argmax.empty())
|
||||
CLDNN_ERROR_NOT_EQUAL(desc->id,
|
||||
"Pooling mode",
|
||||
static_cast<size_t>(desc->mode),
|
||||
"should be max_with_argmax",
|
||||
static_cast<size_t>(pooling_mode::max_with_argmax),
|
||||
"Pooling mode should be set to max_with_argmax when argmax primitive is present.");
|
||||
|
||||
if (desc->mode == pooling_mode::max_with_argmax) {
|
||||
CLDNN_ERROR_NOT_EQUAL(desc->id,
|
||||
"Argmax primitive",
|
||||
static_cast<size_t>(desc->argmax.empty()),
|
||||
"should not be empty",
|
||||
static_cast<size_t>(0),
|
||||
"Argmax primitive not present despite max_with_argmax mode.");
|
||||
|
||||
auto argmax_layout = impl_param.get_input_layout(1);
|
||||
CLDNN_ERROR_NOT_EQUAL(desc->id,
|
||||
"Argmax data type",
|
||||
static_cast<size_t>(argmax_layout.data_type),
|
||||
"expected to be fp32",
|
||||
static_cast<size_t>(data_types::f32),
|
||||
"Argmax data type is not fp32.");
|
||||
CLDNN_ERROR_NOT_PROPER_FORMAT(desc->id,
|
||||
"Input_layout.format",
|
||||
input_layout.format.value,
|
||||
"argmax_layout.format",
|
||||
argmax_layout.format);
|
||||
}
|
||||
|
||||
if (desc->global_pooling) {
|
||||
window_size = ov::Shape(input_layout.get_spatial_rank(), 1);
|
||||
for (size_t i = 0; i < input_layout.get_spatial_rank(); i++) {
|
||||
window_size[i] = input_layout.spatial(input_layout.get_spatial_rank() - i - 1);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1;
|
||||
uint32_t stride_y = stride.size() >= 2 ? stride[stride.size() - 2] : 1;
|
||||
uint32_t stride_x = stride.size() >= 1 ? stride[stride.size() - 1] : 1;
|
||||
@ -190,13 +153,10 @@ std::string pooling_inst::to_string(pooling_node const& node) {
|
||||
|
||||
std::stringstream primitive_description;
|
||||
|
||||
bool is_global = desc->global_pooling;
|
||||
|
||||
json_composite pooling_info;
|
||||
pooling_info.add("mode", mode);
|
||||
pooling_info.add("stride", cldnn::to_string(strd));
|
||||
pooling_info.add("kernel size", cldnn::to_string(kernel_size));
|
||||
pooling_info.add("is global", is_global ? "true" : "false");
|
||||
if (desc->with_output_size) {
|
||||
json_composite ud_out_size_info;
|
||||
ud_out_size_info.add("size", desc->output_size.to_string());
|
||||
|
@ -327,7 +327,7 @@ bool program::analyze_output_size_handling_need() {
|
||||
auto calc_output_range = calc_sliding_window_output_range<swor_mode::exceed_once_data>(
|
||||
primInputSize,
|
||||
size,
|
||||
ov::CoordinateDiff(prim->pad.begin(), prim->pad.end()),
|
||||
ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()),
|
||||
prim->stride,
|
||||
ov::Strides(prim->stride.size(), 1),
|
||||
true,
|
||||
|
@ -1,44 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(pooling_gpu)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output, const __global float* arg_max)
|
||||
{
|
||||
#if OUTPUT_LAYOUT_BFYX || OUTPUT_LAYOUT_BYXF
|
||||
const uint x = (uint)get_global_id(0);
|
||||
const uint y = (uint)get_global_id(1);
|
||||
const uint bf = (uint)get_global_id(2);
|
||||
const uint f = bf % INPUT0_FEATURE_NUM;
|
||||
const uint b = bf / INPUT0_FEATURE_NUM;
|
||||
|
||||
if (x >= INPUT0_SIZE_X)
|
||||
{
|
||||
return;
|
||||
}
|
||||
#elif OUTPUT_LAYOUT_YXFB
|
||||
const uint x = (uint)get_global_id(1);
|
||||
const uint y = (uint)get_global_id(2);
|
||||
const uint bf = (uint)get_global_id(0);
|
||||
const uint f = bf / INPUT0_BATCH_NUM;
|
||||
const uint b = bf % INPUT0_BATCH_NUM;
|
||||
#endif
|
||||
|
||||
const uint input_id = GET_DATA_INDEX(INPUT0, b, f, y, x);
|
||||
const uint arg_max_id = GET_DATA_INDEX(INPUT1, b, f, y, x);
|
||||
const uint pool_idx = convert_uint(arg_max[arg_max_id]);
|
||||
|
||||
#if OUTPUT_PADDED
|
||||
const uint x_output = pool_idx % OUTPUT_SIZE_X;
|
||||
const uint y_output = (pool_idx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
|
||||
const uint f_output = (pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM;
|
||||
const uint b_output = pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y / OUTPUT_FEATURE_NUM;
|
||||
|
||||
const uint output_pos = GET_DATA_INDEX(OUTPUT, b_output, f_output, y_output, x_output);
|
||||
output[output_pos] += ACTIVATION(input[input_id], ACTIVATION_PARAMS);
|
||||
#else
|
||||
output[pool_idx] += ACTIVATION(input[input_id], ACTIVATION_PARAMS);
|
||||
#endif
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
|
||||
#if MAX_POOLING
|
||||
#define INIT_VAL ACCUMULATOR_VAL_MIN
|
||||
#elif defined AVG_POOLING
|
||||
#define INIT_VAL ACCUMULATOR_VAL_ZERO
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TYPE in)
|
||||
{
|
||||
#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
|
||||
#if MAX_POOLING
|
||||
return ACCUMULATOR_MAX_FUNC(tmp, in);
|
||||
#elif AVG_POOLING
|
||||
return tmp + in;
|
||||
@ -25,9 +25,6 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TY
|
||||
KERNEL(pooling_gpu)(
|
||||
const __global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* output
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
, __global float* arg_max
|
||||
#endif
|
||||
#if HAS_FUSED_OPS_DECLS
|
||||
, FUSED_OPS_DECLS
|
||||
#endif
|
||||
@ -49,12 +46,6 @@ KERNEL(pooling_gpu)(
|
||||
ACCUMULATOR_TYPE max_x[BLOCK_SIZE_Y];
|
||||
ACCUMULATOR_TYPE result[POOL_SIZE_Y];
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
uint arg_max_x[BLOCK_SIZE_Y] = { 0 };
|
||||
uint arg_max_result[POOL_SIZE_Y] = { 0 };
|
||||
uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
|
||||
#endif
|
||||
|
||||
for(uint i = 0; i < BLOCK_SIZE_Y; i++)
|
||||
{
|
||||
max_x[i] = INIT_VAL;
|
||||
@ -65,33 +56,15 @@ KERNEL(pooling_gpu)(
|
||||
{
|
||||
for(uint i = 0; i < POOL_SIZE_X; i++)
|
||||
{
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
if(input[input_idx] > max_x[j])
|
||||
arg_max_x[j] = input_idx_bfyx_no_padding;
|
||||
#endif
|
||||
max_x[j] = FUNC_CALL(apply_pooling)(max_x[j], TO_ACCUMULATOR_TYPE(input[input_idx]));
|
||||
input_idx += INPUT0_X_PITCH;
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
input_idx_bfyx_no_padding++;
|
||||
#endif
|
||||
|
||||
}
|
||||
input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH);
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
input_idx_bfyx_no_padding += (INPUT0_SIZE_X - POOL_SIZE_X);
|
||||
#endif
|
||||
}
|
||||
|
||||
for(uint i = 0; i < POOL_SIZE_Y; i++)
|
||||
{
|
||||
result[i] = max_x[i * STRIDE_SIZE_Y];
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
arg_max_result[i] = arg_max_x[i * STRIDE_SIZE_Y];
|
||||
#endif
|
||||
}
|
||||
|
||||
// now we do max in "y" dimension
|
||||
@ -100,10 +73,6 @@ KERNEL(pooling_gpu)(
|
||||
for(uint j = 1; j < POOL_SIZE_Y; j++)
|
||||
{
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
if(max_x[j + i * STRIDE_SIZE_Y] > result[i])
|
||||
arg_max_result[i] = arg_max_x[j + i * STRIDE_SIZE_Y];
|
||||
#endif
|
||||
|
||||
result[i] = FUNC_CALL(apply_pooling)(result[i], max_x[j + i * STRIDE_SIZE_Y]);
|
||||
}
|
||||
@ -111,10 +80,6 @@ KERNEL(pooling_gpu)(
|
||||
|
||||
uint output_pos = GET_DATA_INDEX(OUTPUT, b, f, y, x);
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
uint arg_max_pos = GET_DATA_INDEX(INPUT1, b, f, y, x);
|
||||
#endif
|
||||
|
||||
OUTPUT_TYPE final_result;
|
||||
ACTIVATION_TYPE pool_result;
|
||||
|
||||
@ -134,10 +99,6 @@ KERNEL(pooling_gpu)(
|
||||
#endif
|
||||
output[output_pos] = final_result;
|
||||
output_pos += OUTPUT_Y_PITCH;
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
arg_max[arg_max_pos] = arg_max_result[i];
|
||||
arg_max_pos += INPUT1_Y_PITCH;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
|
||||
#if MAX_POOLING
|
||||
#define INIT_VAL ACCUMULATOR_VAL_MIN
|
||||
#elif AVG_POOLING
|
||||
#define INIT_VAL ACCUMULATOR_VAL_ZERO
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TYPE in)
|
||||
{
|
||||
#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
|
||||
#if MAX_POOLING
|
||||
return ACCUMULATOR_MAX_FUNC(tmp, in);
|
||||
#elif AVG_POOLING
|
||||
return tmp + in;
|
||||
@ -25,9 +25,6 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TY
|
||||
KERNEL(pooling_gpu)(
|
||||
const __global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* output
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
, __global float* arg_max
|
||||
#endif
|
||||
#ifdef SELECTED_INDICES_TYPE
|
||||
, __global SELECTED_INDICES_TYPE* indices
|
||||
#endif
|
||||
@ -98,10 +95,6 @@ KERNEL(pooling_gpu)(
|
||||
uint result_idx = 0;
|
||||
#endif
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
uint arg_max_idx = 0;
|
||||
#endif
|
||||
|
||||
#ifdef CHECK_BOUNDARY
|
||||
if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
|
||||
offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y ||
|
||||
@ -163,19 +156,6 @@ KERNEL(pooling_gpu)(
|
||||
const uint input_idx = batch_and_feature_offset + input_offset_y*INPUT0_Y_PITCH + input_offset_x*INPUT0_X_PITCH;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
if(input[input_idx] > result)
|
||||
{
|
||||
#if OUTPUT_DIMS < 5
|
||||
const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
|
||||
#else
|
||||
const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y *
|
||||
(input_offset_z + INPUT0_SIZE_Z * (f + INPUT0_FEATURE_NUM * b)));
|
||||
#endif
|
||||
arg_max_idx = input_idx_bfyx_no_padding;
|
||||
}
|
||||
#endif
|
||||
const ACCUMULATOR_TYPE casted_input = TO_ACCUMULATOR_TYPE(input[input_idx]);
|
||||
#ifdef SELECTED_INDICES_TYPE
|
||||
if (casted_input > result)
|
||||
@ -220,14 +200,6 @@ KERNEL(pooling_gpu)(
|
||||
uint input_idx = INPUT0_GET_INDEX(b, f, offset_y, offset_x);
|
||||
#endif
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
#if OUTPUT_DIMS < 5
|
||||
uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
|
||||
#else
|
||||
uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (offset_z + INPUT0_SIZE_Z *(f + INPUT0_FEATURE_NUM * b)));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
for(uint l = 0; l < POOL_SIZE_Z; l++)
|
||||
{
|
||||
@ -236,11 +208,6 @@ KERNEL(pooling_gpu)(
|
||||
{
|
||||
for(uint i = 0; i < POOL_SIZE_X; i++)
|
||||
{
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
if(input[input_idx] > result)
|
||||
arg_max_idx = input_idx_bfyx_no_padding;
|
||||
#endif
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
#if !INPUT0_SIMPLE
|
||||
uint input_idx = INPUT0_GET_INDEX(b, f, offset_z + l, offset_y + j, offset_x + i);
|
||||
@ -267,21 +234,11 @@ KERNEL(pooling_gpu)(
|
||||
input_idx += INPUT0_X_PITCH;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
input_idx_bfyx_no_padding++;
|
||||
#endif
|
||||
}
|
||||
input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH);
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
input_idx_bfyx_no_padding += (INPUT0_SIZE_X - POOL_SIZE_X);
|
||||
#endif
|
||||
}
|
||||
#if OUTPUT_DIMS == 5 // 3D
|
||||
input_idx += (INPUT0_Z_PITCH - POOL_SIZE_Y*INPUT0_Y_PITCH);
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
input_idx_bfyx_no_padding += (INPUT0_SIZE_Y - POOL_SIZE_Y);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -322,12 +279,6 @@ KERNEL(pooling_gpu)(
|
||||
#endif
|
||||
indices[output_pos] = TO_SELECTED_INDICES_TYPE(result_idx);
|
||||
#endif
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
//INPUT1 macro stands for Argmax
|
||||
const uint arg_max_pos = GET_DATA_INDEX_5D(INPUT1, b, f, z, y, x);
|
||||
arg_max[arg_max_pos] = convert_float(arg_max_idx);
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef INIT_VAL
|
||||
|
@ -38,7 +38,6 @@ enum class KernelType {
|
||||
RESAMPLE,
|
||||
REGION_YOLO,
|
||||
REORG_YOLO,
|
||||
MAX_UNPOOLING,
|
||||
MVN,
|
||||
LSTM_GEMM,
|
||||
LSTM_ELT,
|
||||
@ -184,7 +183,6 @@ enum class ActivationFunction {
|
||||
enum class PoolType {
|
||||
MAX,
|
||||
AVG,
|
||||
MAX_WITH_ARGMAX,
|
||||
BILINEAR,
|
||||
DEFORMABLE_BILINEAR
|
||||
};
|
||||
|
@ -242,7 +242,6 @@ std::string toString(PoolType mode) {
|
||||
switch (mode) {
|
||||
case PoolType::MAX: return "MAX";
|
||||
case PoolType::AVG: return "AVG";
|
||||
case PoolType::MAX_WITH_ARGMAX: return "MAX_WITH_ARGMAX";
|
||||
case PoolType::BILINEAR: return "BILINEAR";
|
||||
case PoolType::DEFORMABLE_BILINEAR: return "DEFORMABLE_BILINEAR";
|
||||
default: return "";
|
||||
|
@ -213,9 +213,6 @@ void ParamsKey::EnablePoolType(PoolType t) {
|
||||
case PoolType::AVG:
|
||||
key.restrict.val.dedicated.pooling.avg = 1;
|
||||
break;
|
||||
case PoolType::MAX_WITH_ARGMAX:
|
||||
key.restrict.val.dedicated.pooling.max_with_argmax = 1;
|
||||
break;
|
||||
case PoolType::BILINEAR:
|
||||
key.restrict.val.dedicated.pooling.bilinear = 1;
|
||||
break;
|
||||
|
@ -106,7 +106,6 @@ public:
|
||||
uint32_t max : 1;
|
||||
uint32_t avg : 1;
|
||||
uint32_t floor : 1;
|
||||
uint32_t max_with_argmax : 1;
|
||||
uint32_t ceil : 1;
|
||||
uint32_t bilinear : 1;
|
||||
uint32_t deformable_bilinear : 1;
|
||||
|
@ -1,77 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "max_unpooling_kernel_base.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace kernel_selector {
|
||||
bool MaxUnpoolingKernelBase::Validate(const Params& p, const optional_params& o) const {
|
||||
if (p.GetType() != KernelType::MAX_UNPOOLING || o.GetType() != KernelType::MAX_UNPOOLING) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants MaxUnpoolingKernelBase::GetJitConstants(const max_unpooling_params& params) const {
|
||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||
if (params.outputs[0].PitchesDifferFromLogicalDims())
|
||||
jit.AddConstant(MakeJitConstant("OUTPUT_PADDED", 1));
|
||||
return jit;
|
||||
}
|
||||
|
||||
MaxUnpoolingKernelBase::DispatchData MaxUnpoolingKernelBase::SetDefault(const max_unpooling_params& params) const {
|
||||
const auto& input = params.inputs[0];
|
||||
|
||||
DispatchData dispatchData;
|
||||
|
||||
if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::byxf) {
|
||||
// Determine global work sizes.
|
||||
dispatchData.gws[2] = input.Batch().v * input.Feature().v; // B, F
|
||||
dispatchData.gws[0] = Align(input.X().v, 32); // X
|
||||
dispatchData.gws[1] = input.Y().v; // Y
|
||||
|
||||
dispatchData.lws[0] = 32;
|
||||
dispatchData.lws[1] = 1;
|
||||
dispatchData.lws[2] = 1;
|
||||
} else {
|
||||
// Determine global work sizes.
|
||||
dispatchData.gws[0] = input.Batch().v * input.Feature().v; // B, F
|
||||
dispatchData.gws[1] = input.X().v; // X
|
||||
dispatchData.gws[2] = input.Y().v; // Y
|
||||
|
||||
dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
|
||||
while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
|
||||
--dispatchData.lws[0];
|
||||
}
|
||||
dispatchData.lws[1] = 1;
|
||||
dispatchData.lws[2] = 1;
|
||||
}
|
||||
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const max_unpooling_params& orgParams = static_cast<const max_unpooling_params&>(params);
|
||||
|
||||
DispatchData dispatchData = SetDefault(orgParams);
|
||||
|
||||
KernelData kd = KernelData::Default<max_unpooling_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
|
||||
|
||||
return {kd};
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,43 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// max_unpooling_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct max_unpooling_params : public base_params {
|
||||
max_unpooling_params() : base_params(KernelType::MAX_UNPOOLING) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// max_unpooling_optional_params
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct max_unpooling_optional_params : optional_params {
|
||||
max_unpooling_optional_params() : optional_params(KernelType::MAX_UNPOOLING) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MaxUnpoolingKernelBase
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class MaxUnpoolingKernelBase : public KernelBaseOpenCL {
|
||||
public:
|
||||
using KernelBaseOpenCL::KernelBaseOpenCL;
|
||||
virtual ~MaxUnpoolingKernelBase() {}
|
||||
|
||||
struct DispatchData : public CommonDispatchData {
|
||||
bool needsBoundary = false;
|
||||
};
|
||||
|
||||
protected:
|
||||
bool Validate(const Params&, const optional_params&) const override;
|
||||
virtual JitConstants GetJitConstants(const max_unpooling_params& params) const;
|
||||
virtual DispatchData SetDefault(const max_unpooling_params& params) const;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,36 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "max_unpooling_kernel_gpu_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
ParamsKey MaxUnpoolingKernelGPURef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableInputLayout(DataLayout::yxfb);
|
||||
k.EnableInputLayout(DataLayout::byxf);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::yxfb);
|
||||
k.EnableOutputLayout(DataLayout::byxf);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
k.EnableDifferentTypes();
|
||||
return k;
|
||||
}
|
||||
|
||||
KernelsData MaxUnpoolingKernelGPURef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
return GetCommonKernelsData(params, options);
|
||||
}
|
||||
|
||||
KernelsPriority MaxUnpoolingKernelGPURef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
|
||||
return FORCE_PRIORITY_9;
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,19 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "max_unpooling_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class MaxUnpoolingKernelGPURef : public MaxUnpoolingKernelBase {
|
||||
public:
|
||||
MaxUnpoolingKernelGPURef() : MaxUnpoolingKernelBase("max_unpooling_gpu_ref") {}
|
||||
virtual ~MaxUnpoolingKernelGPURef() {}
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -1,15 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "max_unpooling_kernel_selector.h"
|
||||
#include "max_unpooling_kernel_gpu_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
max_unpooling_kernel_selector::max_unpooling_kernel_selector() { Attach<MaxUnpoolingKernelGPURef>(); }
|
||||
|
||||
KernelsData max_unpooling_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::MAX_UNPOOLING);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -1,23 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class max_unpooling_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static max_unpooling_kernel_selector& Instance() {
|
||||
static max_unpooling_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
max_unpooling_kernel_selector();
|
||||
|
||||
virtual ~max_unpooling_kernel_selector() {}
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -203,8 +203,6 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params,
|
||||
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1,
|
||||
GetFusedPrimitiveInputsCount(params));
|
||||
uint32_t param_idx = 1;
|
||||
if (orgParams.poolType == PoolType::MAX_WITH_ARGMAX)
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++});
|
||||
|
||||
if (orgParams.maxPoolOpset8Features) {
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++});
|
||||
|
@ -20,7 +20,6 @@ ParamsKey PoolingKernel_b_fs_yx_fsv16::GetSupportedKey() const {
|
||||
k.EnableBatching();
|
||||
k.EnablePoolType(PoolType::MAX);
|
||||
k.EnablePoolType(PoolType::AVG);
|
||||
k.EnablePoolType(PoolType::MAX_WITH_ARGMAX);
|
||||
k.EnablePoolRemainder(PoolRemainder::FLOOR);
|
||||
k.EnablePoolRemainder(PoolRemainder::CEIL);
|
||||
k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);
|
||||
|
@ -20,7 +20,6 @@ ParamsKey PoolingKernelGPUBfyxBlockOpt::GetSupportedKey() const {
|
||||
k.EnableBatching();
|
||||
k.EnablePoolType(PoolType::MAX);
|
||||
k.EnablePoolType(PoolType::AVG);
|
||||
k.EnablePoolType(PoolType::MAX_WITH_ARGMAX);
|
||||
k.EnablePoolRemainder(PoolRemainder::FLOOR);
|
||||
k.EnablePoolRemainder(PoolRemainder::CEIL);
|
||||
k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);
|
||||
|
@ -90,8 +90,8 @@ bool PoolingKernelGPUInt8Ref::Validate(const Params& params, const optional_para
|
||||
|
||||
if (p.inputs[0].GetDType() == Datatype::INT8 || p.inputs[0].GetDType() == Datatype::UINT8) {
|
||||
// Max pooling doesn't change quantization ranges, so output data type should be the same as input
|
||||
if ((p.poolType == PoolType::MAX || p.poolType == PoolType::MAX_WITH_ARGMAX)
|
||||
&& (p.outputs[0].GetDType() != p.inputs[0].GetDType()) && p.quantization == QuantizationType::NONE)
|
||||
if (p.poolType == PoolType::MAX && p.outputs[0].GetDType() != p.inputs[0].GetDType() &&
|
||||
p.quantization == QuantizationType::NONE)
|
||||
return false;
|
||||
// Average pooling should produce FP by default. (u)int8 is possible when quantize op is fused.
|
||||
// if (p.poolType == PoolType::AVG &&
|
||||
|
@ -20,7 +20,6 @@ ParamsKey PoolingKernelGPURef::GetSupportedKey() const {
|
||||
k.EnableBatching();
|
||||
k.EnablePoolType(PoolType::MAX);
|
||||
k.EnablePoolType(PoolType::AVG);
|
||||
k.EnablePoolType(PoolType::MAX_WITH_ARGMAX);
|
||||
k.EnablePoolRemainder(PoolRemainder::FLOOR);
|
||||
k.EnablePoolRemainder(PoolRemainder::CEIL);
|
||||
k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);
|
||||
|
@ -36,9 +36,9 @@ static void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Av
|
||||
kernel,
|
||||
strides,
|
||||
pads_begin,
|
||||
pads_end,
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
cldnn::element_type_to_data_type(op->get_output_element_type(0)));
|
||||
poolPrim.pad_end = pads_end;
|
||||
p.add_primitive(*op, poolPrim);
|
||||
}
|
||||
|
||||
@ -64,9 +64,9 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Ma
|
||||
kernel,
|
||||
strides,
|
||||
pads_begin,
|
||||
pads_end,
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
cldnn::element_type_to_data_type(op->get_output_element_type(0)));
|
||||
poolPrim.pad_end = pads_end;
|
||||
p.add_primitive(*op, poolPrim);
|
||||
}
|
||||
|
||||
|
@ -148,11 +148,12 @@ TEST_P(pooling_f32_activation, basic) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 3);
|
||||
ov::Strides stride(r, 1);
|
||||
ov::Shape pad(r, 1);
|
||||
ov::Shape pads_begin(r, 1);
|
||||
ov::Shape pads_end(r, 1);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
activation("act", "pooling", activation_func::relu),
|
||||
reorder("output_reorder", "act", format::bfyx, data_types::f32)
|
||||
);
|
||||
@ -185,12 +186,13 @@ TEST_P(pooling_f32_scale, basic) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 3);
|
||||
ov::Strides stride(r, 1);
|
||||
ov::Shape pad(r, 1);
|
||||
ov::Shape pads_begin(r, 1);
|
||||
ov::Shape pads_end(r, 1);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 9.0f)),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
|
||||
reorder("output_reorder", "scale", format::bfyx, data_types::f32)
|
||||
);
|
||||
@ -205,12 +207,13 @@ TEST_P(pooling_f32_scale, fp16_scale_out) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 3);
|
||||
ov::Strides stride(r, 1);
|
||||
ov::Shape pad(r, 1);
|
||||
ov::Shape pads_begin(r, 1);
|
||||
ov::Shape pads_end(r, 1);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 9.0f)),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, data_types::f16),
|
||||
reorder("output_reorder", "scale", format::bfyx, data_types::f32)
|
||||
);
|
||||
@ -241,7 +244,8 @@ TEST_P(pooling_scale_activation_quantize, basic) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 4);
|
||||
ov::Strides stride(r, 2);
|
||||
ov::Shape pad(r, 0);
|
||||
ov::Shape pads_begin(r, 0);
|
||||
ov::Shape pads_end(r, 0);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
@ -250,7 +254,7 @@ TEST_P(pooling_scale_activation_quantize, basic) {
|
||||
data("out_lo", get_mem(get_single_element_layout(p), 0)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), 255)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
|
||||
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("activation", "scale", activation_func::relu),
|
||||
quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::u8),
|
||||
@ -267,7 +271,8 @@ TEST_P(pooling_scale_activation_quantize, i8_output_data_type) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 4);
|
||||
ov::Strides stride(r, 2);
|
||||
ov::Shape pad(r, 0);
|
||||
ov::Shape pads_begin(r, 0);
|
||||
ov::Shape pads_end(r, 0);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
@ -276,7 +281,7 @@ TEST_P(pooling_scale_activation_quantize, i8_output_data_type) {
|
||||
data("out_lo", get_mem(get_single_element_layout(p), -127, 127)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), -127, 127)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
|
||||
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("activation", "scale", activation_func::relu),
|
||||
quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
|
||||
@ -293,7 +298,8 @@ TEST_P(pooling_scale_activation_quantize, per_channel) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 4);
|
||||
ov::Strides stride(r, 2);
|
||||
ov::Shape pad(r, 0);
|
||||
ov::Shape pads_begin(r, 0);
|
||||
ov::Shape pads_end(r, 0);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
@ -302,7 +308,7 @@ TEST_P(pooling_scale_activation_quantize, per_channel) {
|
||||
data("out_lo", get_mem(get_single_element_layout(p), 0)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), 255)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
|
||||
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("activation", "scale", activation_func::hyperbolic_tan),
|
||||
quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::u8),
|
||||
@ -362,12 +368,13 @@ TEST_P(pooling_scale_activation, basic) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 4);
|
||||
ov::Strides stride(r, 2);
|
||||
ov::Shape pad(r, 0);
|
||||
ov::Shape pads_begin(r, 0);
|
||||
ov::Shape pads_end(r, 0);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
|
||||
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("activation", "scale", activation_func::relu),
|
||||
reorder("output_reorder", "activation", p.default_format, data_types::f32)
|
||||
@ -383,12 +390,13 @@ TEST_P(pooling_scale_activation, eltwise_mul) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 4);
|
||||
ov::Strides stride(r, 2);
|
||||
ov::Shape pad(r, 0);
|
||||
ov::Shape pads_begin(r, 0);
|
||||
ov::Shape pads_end(r, 0);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p))),
|
||||
pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("activation", "scale", activation_func::relu),
|
||||
reorder("output_reorder", "activation", p.default_format, data_types::f32)
|
||||
@ -577,11 +585,12 @@ TEST_P(pooling_onednn_activation1, basic) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 3);
|
||||
ov::Strides stride(r, 1);
|
||||
ov::Shape pad(r, 1);
|
||||
ov::Shape pads_begin(r, 1);
|
||||
ov::Shape pads_end(r, 1);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
activation("act", "pooling", activation_func::relu),
|
||||
reorder("output_reorder", "act", format::bfyx, data_types::f32)
|
||||
);
|
||||
@ -597,10 +606,12 @@ TEST_P(pooling_onednn_activation2, basic) {
|
||||
auto r = get_input_layout(p).get_spatial_rank();
|
||||
ov::Shape kernel(r, 3);
|
||||
ov::Strides stride(r, 1);
|
||||
ov::Shape pads_begin(r, 0);
|
||||
ov::Shape pads_end(r, 0);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride),
|
||||
pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
|
||||
activation("act", "pooling", activation_func::relu),
|
||||
reorder("output_reorder", "act", format::bfyx, data_types::f32)
|
||||
);
|
||||
|
@ -1,442 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/max_unpooling.hpp>
|
||||
#include <intel_gpu/primitives/reorder.hpp>
|
||||
#include <intel_gpu/primitives/data.hpp>
|
||||
#include <intel_gpu/primitives/mutable_data.hpp>
|
||||
#include <intel_gpu/primitives/pooling.hpp>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
TEST(max_unpooling_gpu, basic_in2x3x2x2) {
|
||||
// Input : 2x2x2x1
|
||||
// Argmax : 2x2x2x1
|
||||
// Output : 2x2x3x2
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Input:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
//
|
||||
// Output:
|
||||
// f0: b0: 0 0 0 b1: 0 0 0
|
||||
// f0: b0: 0 4 0 b1: 0.5 0 0
|
||||
// f1: b0: 0 0 0 b1: 0 0 0
|
||||
// f1: b0: 0 8 16 b1: 12 0 17
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
4.0f, 4.0f,
|
||||
8.0f, 16.0f,
|
||||
0.5f, 0.0f,
|
||||
12.0f, 17.0f
|
||||
});
|
||||
|
||||
set_values(arg_max, {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(data("arg_max", arg_max));
|
||||
topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("max_unpooling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfyx);
|
||||
EXPECT_EQ(output_layout.spatial(1), 2);
|
||||
EXPECT_EQ(output_layout.spatial(0), 3);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
0.f, 0.f, 0.f,
|
||||
0.f, 4.f, 0.f,
|
||||
0.f, 0.f, 0.f,
|
||||
0.f, 8.f, 16.f,
|
||||
0.f, 0.f, 0.f,
|
||||
0.5f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f,
|
||||
12.f, 0.f, 17.f,
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(max_unpooling_gpu, basic_in2x3x2x2_output_padding) {
|
||||
// Input : 2x2x2x1
|
||||
// Argmax : 2x2x2x1
|
||||
// Output : 2x2x3x2
|
||||
// Output Padding : 0x0x1x1
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Input:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
//
|
||||
// Output:
|
||||
// f0: b0: 0 0 0 b1: 0 0 0
|
||||
// f0: b0: 0 4 0 b1: 0.5 0 0
|
||||
// f1: b0: 0 0 0 b1: 0 0 0
|
||||
// f1: b0: 0 8 16 b1: 12 0 17
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
4.0f, 4.0f,
|
||||
8.0f, 16.0f,
|
||||
0.5f, 0.0f,
|
||||
12.0f, 17.0f
|
||||
});
|
||||
|
||||
set_values(arg_max, {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(data("arg_max", arg_max));
|
||||
topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, padding({ 0, 0, 1, 1 }, 0)));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("max_unpooling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfyx);
|
||||
EXPECT_EQ(output_layout.spatial(1), 2);
|
||||
EXPECT_EQ(output_layout.spatial(0), 3);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 4.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 8.f, 16.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.5f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 12.f, 0.f, 17.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
};
|
||||
std::vector<float> out;
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
out.push_back(output_ptr[i]);
|
||||
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(max_unpooling_gpu, basic_in2x3x2x2_output_size) {
|
||||
// Input : 2x2x2x1
|
||||
// Argmax : 2x2x2x1
|
||||
// Output : 2x2x3x2
|
||||
// Output size explicitly provided
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Input:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
//
|
||||
// Output:
|
||||
// f0: b0: 0 0 0 b1: 0 0 0
|
||||
// f0: b0: 0 4 0 b1: 0.5 0 0
|
||||
// f1: b0: 0 0 0 b1: 0 0 0
|
||||
// f1: b0: 0 8 16 b1: 12 0 17
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
4.0f, 4.0f,
|
||||
8.0f, 16.0f,
|
||||
0.5f, 0.0f,
|
||||
12.0f, 17.0f
|
||||
});
|
||||
|
||||
set_values(arg_max, {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(data("arg_max", arg_max));
|
||||
topology.add(max_unpooling("max_unpooling", "input", "arg_max", {2, 2, 3, 2}));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("max_unpooling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfyx);
|
||||
EXPECT_EQ(output_layout.spatial(1), 2);
|
||||
EXPECT_EQ(output_layout.spatial(0), 3);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
0.f, 0.f, 0.f,
|
||||
0.f, 4.f, 0.f,
|
||||
0.f, 0.f, 0.f,
|
||||
0.f, 8.f, 16.f,
|
||||
0.f, 0.f, 0.f,
|
||||
0.5f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f,
|
||||
12.f, 0.f, 17.f,
|
||||
};
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(max_unpooling_gpu, basic_in2x3x2x2_fp16) {
|
||||
// Input : 2x2x2x1
|
||||
// Argmax : 2x2x2x1
|
||||
// Output : 2x2x3x2
|
||||
// Input values in fp16
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Input:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
//
|
||||
// Output:
|
||||
// f0: b0: 0 0 0 b1: 0 0 0
|
||||
// f0: b0: 0 4 0 b1: 0.5 0 0
|
||||
// f1: b0: 0 0 0 b1: 0 0 0
|
||||
// f1: b0: 0 8 16 b1: 12 0 17
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
FLOAT16(4.0f), FLOAT16(4.0f),
|
||||
FLOAT16(8.0f), FLOAT16(16.0f),
|
||||
FLOAT16(0.5f), FLOAT16(0.0f),
|
||||
FLOAT16(12.0f), FLOAT16(17.0f)
|
||||
});
|
||||
|
||||
set_values(arg_max, {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(data("arg_max", arg_max));
|
||||
topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("max_unpooling").get_memory();
|
||||
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfyx);
|
||||
EXPECT_EQ(output_layout.spatial(1), 2);
|
||||
EXPECT_EQ(output_layout.spatial(0), 3);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
0.f, 0.f, 0.f,
|
||||
0.f, 4.f, 0.f,
|
||||
0.f, 0.f, 0.f,
|
||||
0.f, 8.f, 16.f,
|
||||
0.f, 0.f, 0.f,
|
||||
0.5f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f,
|
||||
12.f, 0.f, 17.f,
|
||||
};
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) {
|
||||
// Input : 2x2x3x2
|
||||
// Argmax : 2x2x2x1
|
||||
// Output : 2x2x2x2
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Forward Max Pooling Output:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
//
|
||||
// Unpooling output:
|
||||
// f0: b0: 0 0 0 b1: 0 0 0
|
||||
// f0: b0: 0 4 0 b1: 0.5 0 0
|
||||
// f1: b0: 0 0 0 b1: 0 0 0
|
||||
// f1: b0: 0 8 16 b1: 12 0 17
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
1.0f, 2.0f, -10.f,
|
||||
3.0f, 4.0f, -14.f,
|
||||
5.0f, 6.0f, -12.f,
|
||||
7.0f, 8.0f, 16.0f,
|
||||
0.f, 0.f, -11.f,
|
||||
0.5f, -0.5f, -15.f,
|
||||
1.5f, 5.2f, -13.f,
|
||||
12.f, 9.f, 17.f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(mutable_data("arg_max", arg_max));
|
||||
topology.add(pooling("pooling_max_with_argmax", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }));
|
||||
topology.add(max_unpooling("max_unpooling", "pooling_max_with_argmax", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("max_unpooling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfyx);
|
||||
EXPECT_EQ(output_layout.spatial(1), 2);
|
||||
EXPECT_EQ(output_layout.spatial(0), 3);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_argmax_vec = {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
};
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
0.f, 0.f, 0.f,
|
||||
0.f, 4.f, 0.f,
|
||||
0.f, 0.f, 0.f,
|
||||
0.f, 8.f, 16.f,
|
||||
0.f, 0.f, 0.f,
|
||||
0.5f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f,
|
||||
12.f, 0.f, 17.f,
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < expected_argmax_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
|
||||
}
|
||||
}
|
@ -125,18 +125,12 @@ VVVF<typename pooling_mode_output<InputT, Mode>::type> reference_pooling(const V
|
||||
int stride_z,
|
||||
int offset_x,
|
||||
int offset_y,
|
||||
int offset_z,
|
||||
bool global_pooling) {
|
||||
int offset_z) {
|
||||
using output_t = typename pooling_mode_output<InputT, Mode>::type;
|
||||
VVVF<output_t> result;
|
||||
auto size_x = input[0][0].size();
|
||||
auto size_y = input[0].size();
|
||||
auto size_z = input.size();
|
||||
if (global_pooling) {
|
||||
pool_z = size_z;
|
||||
pool_y = size_y;
|
||||
pool_x = size_x;
|
||||
}
|
||||
|
||||
auto accumulator = pooling_accumulator<InputT, Mode>();
|
||||
|
||||
@ -279,141 +273,6 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) {
|
||||
EXPECT_EQ(2.0f, output_ptr[0]);
|
||||
}
|
||||
|
||||
TEST(pooling_forward_gpu, basic_max_yxfb_f32_global_i3x3x1x1_nopad) {
|
||||
// Brief test description.
|
||||
//
|
||||
// Pool mode: max
|
||||
// Global pooling: true
|
||||
// Padding: none
|
||||
//
|
||||
// Input data:
|
||||
// [-0.5, 1.0, 0.5]
|
||||
// [ 2.0, 1.5, -0.5]
|
||||
// [ 0.0, -1.0, 0.5]
|
||||
//
|
||||
// Expected output:
|
||||
// [ 2.0]
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 3, 3 } });
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(pooling("pool_prim", "input_prim", pooling_mode::max));
|
||||
|
||||
network network(engine, topology);
|
||||
set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
|
||||
network.set_input_data("input_prim", input_prim);
|
||||
|
||||
auto outputs = network.execute();
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
EXPECT_EQ(outputs.begin()->first, "pool_prim");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
|
||||
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
|
||||
|
||||
EXPECT_EQ(2.0f, output_ptr[0]);
|
||||
}
|
||||
|
||||
TEST(pooling_forward_gpu, basic_max_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) {
|
||||
// Brief test description.
|
||||
//
|
||||
// Pool mode: max
|
||||
// Global pooling: true
|
||||
// Padding: none
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } });
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(pooling("pool_prim", "input_prim", pooling_mode::max));
|
||||
|
||||
network network(engine, topology);
|
||||
std::vector<char> vals = {
|
||||
0, 3, 2, -1, 6, 8, 3, -9, 6, -1, 1, 7, -1, 6, 18, 3,
|
||||
-9, 5, -2, 2, 6, -1, 6, 7, 3, -9, 6, -3, 3, 5, -1, 16,
|
||||
8, 3, -9, 6, -4, 4, 3, -1, 6, 8, 33, -9, 6, -5, 5, 21,
|
||||
-1, 6, 8, 3, -9, 6, -5, 36, 2, -1, 6, 8, 3, -9, 6, -6,
|
||||
6, 1, -1, 6, 8, 3, -9, 66, -7, 7, 29, -1, 6, 8, 3, -9,
|
||||
6, 44, 8, -2, -1, 6, 8, 3, -9, 6, -8, 9, -1, 10, 6, 8,
|
||||
3, -9, 6, -9, 10, -3, -1, 6, 8, 3, 99, 6, -10, 11, -4, -1,
|
||||
6, 8, 3, -9, 64, -11, 12, -5, -1, 6, 8, 38, -9, 6, -12, 13,
|
||||
-2, -1, 6, 81, 3, -9, 6, -13, 14, -2, -1, 64, 8, 3, -9, 6,
|
||||
};
|
||||
set_values(input_prim, vals);
|
||||
network.set_input_data("input_prim", input_prim);
|
||||
|
||||
auto outputs = network.execute();
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
EXPECT_EQ(outputs.begin()->first, "pool_prim");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
|
||||
cldnn::mem_lock<char> output_ptr(output_prim, get_test_stream());
|
||||
|
||||
std::vector<char> answers = { 8, 44, 8, 81, 64, 8, 12, 66, 14, 8, 99, 64, 8, 11, 18, 21 };
|
||||
|
||||
ASSERT_EQ(answers.size(), output_ptr.size());
|
||||
for (size_t i = 0; i < output_ptr.size(); ++i) {
|
||||
EXPECT_EQ(answers[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(pooling_forward_gpu, basic_avg_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) {
|
||||
// Brief test description.
|
||||
//
|
||||
// Pool mode: avg
|
||||
// Global pooling: true
|
||||
// Padding: none
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } });
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(pooling("pool_prim", "input_prim", pooling_mode::average));
|
||||
|
||||
network network(engine, topology);
|
||||
std::vector<char> vals = {
|
||||
0, 3, 2, -1, 6, 8, 3, -9, 6, -1, 1, 7, -1, 6, 18, 3,
|
||||
-9, 5, -2, 2, 6, -1, 6, 7, 3, -9, 6, -3, 3, 5, -1, 16,
|
||||
8, 3, -9, 6, -4, 4, 3, -1, 6, 8, 33, -9, 6, -5, 5, 21,
|
||||
-1, 6, 8, 3, -9, 6, -5, 36, 2, -1, 6, 8, 3, -9, 6, -6,
|
||||
6, 1, -1, 6, 8, 3, -9, 66, -7, 7, 29, -1, 6, 8, 3, -9,
|
||||
6, 44, 8, -2, -1, 6, 8, 3, -9, 6, -8, 9, -1, 10, 6, 8,
|
||||
3, -9, 6, -9, 10, -3, -1, 6, 8, 3, 99, 6, -10, 11, -4, -1,
|
||||
6, 8, 3, -9, 64, -11, 12, -5, -1, 6, 8, 38, -9, 6, -12, 13,
|
||||
-2, -1, 6, 81, 3, -9, 6, -13, 14, -2, -1, 64, 8, 3, -9, 6,
|
||||
};
|
||||
set_values(input_prim, vals);
|
||||
network.set_input_data("input_prim", input_prim);
|
||||
|
||||
auto outputs = network.execute();
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
EXPECT_EQ(outputs.begin()->first, "pool_prim");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
|
||||
cldnn::mem_lock<uint8_t> output_ptr(output_prim, get_test_stream());
|
||||
|
||||
std::vector<uint8_t> answers = {
|
||||
29, 199, 241, 63, 85, 85, 213, 64, 85, 85, 21, 64, 142, 227, 8, 65,
|
||||
57, 142, 19, 65, 171, 170, 170, 62, 57, 142, 35, 64, 0, 0, 32, 65,
|
||||
199, 113, 28, 64, 29, 199, 241, 63, 29, 199, 153, 65, 57, 142, 83, 65,
|
||||
228, 56, 14, 63, 142, 227, 120, 64, 171, 170, 170, 63, 85, 85, 181, 64,
|
||||
};
|
||||
|
||||
ASSERT_EQ(answers.size(), output_ptr.size());
|
||||
for (size_t i = 0; i < output_ptr.size(); ++i) {
|
||||
EXPECT_EQ(answers[i], output_ptr[i]) << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(pooling_forward_gpu, basic_max_pooling_int8) {
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
@ -966,7 +825,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
|
||||
topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
|
||||
|
||||
network network(engine, topology);
|
||||
set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
|
||||
@ -1027,7 +886,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
@ -1098,7 +957,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ {0,0,1,2}, 0 })));
|
||||
topology.add(pooling("pool_prim", "reorder", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
|
||||
topology.add(pooling("pool_prim", "reorder", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
|
||||
|
||||
network network(engine, topology);
|
||||
set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
|
||||
@ -1161,7 +1020,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })));
|
||||
topology.add(pooling("pool_prim", "reorder", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
topology.add(pooling("pool_prim", "reorder", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
@ -1232,7 +1091,7 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })));
|
||||
topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, padding{ { 0, 0, 2, 2 }, 0 }));
|
||||
topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, 2, 2 }, 0 }));
|
||||
|
||||
network network(engine, topology);
|
||||
set_values(input_prim, {
|
||||
@ -1300,7 +1159,7 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })));
|
||||
topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
@ -1337,407 +1196,6 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou
|
||||
}
|
||||
}
|
||||
|
||||
TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) {
|
||||
// Input : 2x2x3x2
|
||||
// Argmax : 2x2x2x1
|
||||
// Output : 2x2x2x2
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Output:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
1.0f, 2.0f, -10.f,
|
||||
3.0f, 4.0f, -14.f,
|
||||
5.0f, 6.0f, -12.f,
|
||||
7.0f, 8.0f, 16.0f,
|
||||
0.f, 0.f, -11.f,
|
||||
0.5f, -0.5f, -15.f,
|
||||
1.5f, 5.2f, -13.f,
|
||||
12.f, 9.f, 17.f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(mutable_data("arg_max", arg_max));
|
||||
topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("pooling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfyx);
|
||||
EXPECT_EQ(output_layout.spatial(1), 1);
|
||||
EXPECT_EQ(output_layout.spatial(0), 2);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_argmax_vec = {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
};
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
4.0f, 4.0f,
|
||||
8.0f, 16.0f,
|
||||
0.5f, 0.0f,
|
||||
12.0f, 17.0f
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(pooling_forward_gpu, basic_in2x2x3x2x1_max_with_argmax) {
|
||||
// Input : 2x2x3x2x1
|
||||
// Argmax : 2x2x2x1x1
|
||||
// Output : 2x2x2x2x1
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Output:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 3, 2, 1 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 2, 1, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
1.0f, 2.0f, -10.f,
|
||||
3.0f, 4.0f, -14.f,
|
||||
5.0f, 6.0f, -12.f,
|
||||
7.0f, 8.0f, 16.0f,
|
||||
0.f, 0.f, -11.f,
|
||||
0.5f, -0.5f, -15.f,
|
||||
1.5f, 5.2f, -13.f,
|
||||
12.f, 9.f, 17.f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(mutable_data("arg_max", arg_max));
|
||||
topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 2, 2 }, { 1, 1, 1 }, {0, 0, 0}));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("pooling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfzyx);
|
||||
EXPECT_EQ(output_layout.spatial(2), 1);
|
||||
EXPECT_EQ(output_layout.spatial(1), 1);
|
||||
EXPECT_EQ(output_layout.spatial(0), 2);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_argmax_vec = {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
};
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
4.0f, 4.0f,
|
||||
8.0f, 16.0f,
|
||||
0.5f, 0.0f,
|
||||
12.0f, 17.0f
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) {
|
||||
// Input : 2x2x3x2
|
||||
// Argmax : 2x2x2x1
|
||||
// Output : 2x2x2x2
|
||||
// Input Padding : 2x2
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Output:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
1.0f, 2.0f, -10.f,
|
||||
3.0f, 4.0f, -14.f,
|
||||
5.0f, 6.0f, -12.f,
|
||||
7.0f, 8.0f, 16.0f,
|
||||
0.f, 0.f, -11.f,
|
||||
0.5f, -0.5f, -15.f,
|
||||
1.5f, 5.2f, -13.f,
|
||||
12.f, 9.f, 17.f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 })));
|
||||
topology.add(mutable_data("arg_max", arg_max));
|
||||
topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("pooling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfyx);
|
||||
EXPECT_EQ(output_layout.spatial(1), 1);
|
||||
EXPECT_EQ(output_layout.spatial(0), 2);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_argmax_vec = {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
};
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
4.0f, 4.0f,
|
||||
8.0f, 16.0f,
|
||||
0.5f, 0.0f,
|
||||
12.0f, 17.0f
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) {
|
||||
// Input : 2x2x3x2
|
||||
// Argmax : 2x2x2x1
|
||||
// Output : 2x2x2x2
|
||||
// Output Padding : 2x2
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Output:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
1.0f, 2.0f, -10.f,
|
||||
3.0f, 4.0f, -14.f,
|
||||
5.0f, 6.0f, -12.f,
|
||||
7.0f, 8.0f, 16.0f,
|
||||
0.f, 0.f, -11.f,
|
||||
0.5f, -0.5f, -15.f,
|
||||
1.5f, 5.2f, -13.f,
|
||||
12.f, 9.f, 17.f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 })));
|
||||
topology.add(mutable_data("arg_max", arg_max));
|
||||
topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }, { 0, 0 }, padding({ 0, 0, 1, 1 }, 0)));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("pooling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfyx);
|
||||
EXPECT_EQ(output_layout.spatial(1), 1);
|
||||
EXPECT_EQ(output_layout.spatial(0), 2);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_argmax_vec = {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
};
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 4.0f, 4.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 8.0f, 16.0f,0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.5f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 12.0f, 17.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f,
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < expected_argmax_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) {
|
||||
// Input : 2x2x3x2
|
||||
// Argmax : 2x2x2x1
|
||||
// Output : 2x2x2x2
|
||||
|
||||
// Forward Max Pooling Input:
|
||||
// f0: b0: 1 2 -10 b1: 0 0 -11
|
||||
// f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
|
||||
// f1: b0: 5 6 -12 b1: 1.5 5.2 -13
|
||||
// f1: b0: 7 8 16 b1: 12 9 17
|
||||
//
|
||||
// Output:
|
||||
// f0: b0: 4 4 b1: 0.5 0
|
||||
// f1: b0: 8 16 b1: 12 17
|
||||
//
|
||||
// Argmax:
|
||||
// f0: b0: 4 4 b1: 15 13
|
||||
// f1: b0: 10 11 b1: 21 23
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
|
||||
auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
1.0f, 2.0f, -10.f,
|
||||
3.0f, 4.0f, -14.f,
|
||||
5.0f, 6.0f, -12.f,
|
||||
7.0f, 8.0f, 16.0f,
|
||||
0.f, 0.f, -11.f,
|
||||
0.5f, -0.5f, -15.f,
|
||||
1.5f, 5.2f, -13.f,
|
||||
12.f, 9.f, 17.f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(mutable_data("arg_max", arg_max));
|
||||
topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }, { 0, 0 }, { 2, 2, 2, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("pooling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output_layout = output->get_layout();
|
||||
cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
|
||||
|
||||
EXPECT_EQ(output_layout.format, format::bfyx);
|
||||
EXPECT_EQ(output_layout.spatial(1), 1);
|
||||
EXPECT_EQ(output_layout.spatial(0), 2);
|
||||
EXPECT_EQ(output_layout.feature(), 2);
|
||||
EXPECT_EQ(output_layout.batch(), 2);
|
||||
|
||||
std::vector<float> expected_argmax_vec = {
|
||||
4.0f, 4.0f,
|
||||
10.0f, 11.0f,
|
||||
15.0f, 13.0f,
|
||||
21.0f, 23.0f
|
||||
};
|
||||
|
||||
std::vector<float> expected_output_vec = {
|
||||
4.0f, 4.0f,
|
||||
8.0f, 16.0f,
|
||||
0.5f, 0.0f,
|
||||
12.0f, 17.0f
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
|
||||
EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
|
||||
EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <class DataType>
|
||||
static void generic_average_wo_padding_test(format fmt, tensor output, tensor input, ov::Shape window, ov::Strides stride, ov::Shape offset)
|
||||
{
|
||||
@ -2259,7 +1717,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor)));
|
||||
topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, { 1,1,4,4 }, padding{ { 0, 0, 1, 1 }, 0 })));
|
||||
|
||||
network network(engine, topology);
|
||||
@ -2332,7 +1790,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x
|
||||
topology topology;
|
||||
topology.add(input_layout("input_prim", input_prim->get_layout()));
|
||||
topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ { 0, 0, 2, 1 } , 0 })));
|
||||
topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
|
||||
topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0, 0, 1, 1 }, 0 })));
|
||||
|
||||
network network(engine, topology);
|
||||
@ -2409,7 +1867,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3
|
||||
topology golden_topology;
|
||||
golden_topology.add(input_layout("input", input_prim->get_layout()));
|
||||
golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout().with_padding(padding{ {0,0,x_in_pad,y_in_pad},0 })));
|
||||
golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
|
||||
golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
|
||||
|
||||
network golden_network(engine, golden_topology);
|
||||
golden_network.set_input_data("input", input_prim);
|
||||
@ -2426,7 +1884,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3
|
||||
topology golden_topology;
|
||||
golden_topology.add(input_layout("input", input_prim->get_layout()));
|
||||
golden_topology.add(reorder("reorder_input", "input", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ {0, 0, x_in_pad, y_in_pad}, 0 })));
|
||||
golden_topology.add(pooling("fsv32_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
|
||||
golden_topology.add(pooling("fsv32_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
|
||||
golden_topology.add(reorder("reorder_pooling", "fsv32_pooling", layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0,0,x_out_pad,y_out_pad },0 })));
|
||||
|
||||
network fsv32_network(engine, golden_topology);
|
||||
@ -2468,15 +1926,8 @@ public:
|
||||
stride.insert(stride.begin(), stride_z());
|
||||
pad.insert(pad.begin(), offset_z());
|
||||
}
|
||||
if (global_pooling())
|
||||
topo.add(pooling("pool", "input", pool_mode()));
|
||||
else
|
||||
topo.add(pooling("pool",
|
||||
"input",
|
||||
pool_mode(),
|
||||
kernel,
|
||||
stride,
|
||||
pad));
|
||||
|
||||
topo.add(pooling("pool", "input", pool_mode(), kernel, stride, pad));
|
||||
return topo;
|
||||
}
|
||||
|
||||
@ -2579,7 +2030,6 @@ public:
|
||||
size_t offset_x() { return _offset_x; }
|
||||
size_t offset_y() { return _offset_y; }
|
||||
size_t offset_z() { return _offset_z; }
|
||||
bool global_pooling() { return _global_pooling; }
|
||||
|
||||
void set_input(format::type input_fmt, VVVVVF<InputT> input_data) {
|
||||
_input_fmt = input_fmt;
|
||||
@ -2604,16 +2054,11 @@ public:
|
||||
_offset_z = z;
|
||||
}
|
||||
|
||||
void set_global_pooling(bool global_pooling) {
|
||||
_global_pooling = global_pooling;
|
||||
}
|
||||
|
||||
VVVVVF<InputT> _input;
|
||||
format::type _input_fmt;
|
||||
size_t _pool_x, _pool_y, _pool_z;
|
||||
int _stride_x, _stride_y, _stride_z;
|
||||
int _offset_x, _offset_y, _offset_z;
|
||||
bool _global_pooling;
|
||||
};
|
||||
|
||||
using pooling_random_test_params = std::tuple<
|
||||
@ -2623,8 +2068,7 @@ using pooling_random_test_params = std::tuple<
|
||||
std::tuple<size_t, size_t, size_t>, // pool x, y, z
|
||||
std::tuple<int, int, int>, // stride x, y, z
|
||||
std::tuple<int, int, int>, // offset x, y, z
|
||||
format::type, // input format
|
||||
bool // global pooling
|
||||
format::type // input format
|
||||
>;
|
||||
|
||||
template <typename InputT, pooling_mode Mode>
|
||||
@ -2647,8 +2091,7 @@ public:
|
||||
this->stride_z(),
|
||||
this->offset_x(),
|
||||
this->offset_y(),
|
||||
this->offset_z(),
|
||||
this->global_pooling());
|
||||
this->offset_z());
|
||||
}
|
||||
}
|
||||
return reference;
|
||||
@ -2658,7 +2101,6 @@ public:
|
||||
size_t b, f, in_x, in_y, in_z, p_x, p_y, p_z;
|
||||
int s_x, s_y, s_z, o_x, o_y, o_z;
|
||||
format::type in_fmt;
|
||||
bool global_pooling;
|
||||
|
||||
std::forward_as_tuple(
|
||||
b,
|
||||
@ -2667,24 +2109,16 @@ public:
|
||||
std::forward_as_tuple(p_x, p_y, p_z),
|
||||
std::forward_as_tuple(s_x, s_y, s_z),
|
||||
std::forward_as_tuple(o_x, o_y, o_z),
|
||||
in_fmt,
|
||||
global_pooling
|
||||
in_fmt
|
||||
) = params;
|
||||
|
||||
auto input_data = generate_random_5d<InputT>(b, f, in_z, in_y, in_x, -256, 256);
|
||||
|
||||
this->set_input(in_fmt, std::move(input_data));
|
||||
if (global_pooling) {
|
||||
this->set_pool_size(0, 0, 0);
|
||||
this->set_strides(1, 1, 1);
|
||||
this->set_offsets(0, 0, 0);
|
||||
} else {
|
||||
this->set_pool_size(p_x, p_y, p_z);
|
||||
this->set_strides(s_x, s_y, s_z);
|
||||
this->set_offsets(o_x, o_y, o_z);
|
||||
}
|
||||
this->set_global_pooling(global_pooling);
|
||||
}
|
||||
|
||||
void run_random(const pooling_random_test_params& params) {
|
||||
param_set_up(params);
|
||||
@ -2733,8 +2167,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
format::bfyx,
|
||||
format::b_fs_yx_fsv4,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32),
|
||||
testing::Values(false, true)),
|
||||
format::b_fs_yx_fsv32)),
|
||||
testing::internal::DefaultParamName<pooling_random_test_params>);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
@ -2747,8 +2180,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
testing::Values(std::tuple<int, int, int>(2, 2, 2)),
|
||||
testing::Values(std::tuple<int, int, int>(0, 0, 0)),
|
||||
testing::Values(format::bfzyx,
|
||||
format::b_fs_zyx_fsv16),
|
||||
testing::Values(false, true)),
|
||||
format::b_fs_zyx_fsv16)),
|
||||
testing::internal::DefaultParamName<pooling_random_test_params>);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
@ -2761,9 +2193,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
testing::Values(std::tuple<size_t, size_t, size_t>(1, 1, 1), std::tuple<size_t, size_t, size_t>(3, 3, 1)),
|
||||
testing::Values(std::tuple<int, int, int>(1, 1, 1)),
|
||||
testing::Values(std::tuple<int, int, int>(0, 0, 0)),
|
||||
testing::Values(format::bs_fs_yx_bsv16_fsv16),
|
||||
testing::Values(false, true)
|
||||
),
|
||||
testing::Values(format::bs_fs_yx_bsv16_fsv16)),
|
||||
testing::internal::DefaultParamName<pooling_random_test_params>);
|
||||
|
||||
template <typename InputT, pooling_mode Mode>
|
||||
@ -2851,8 +2281,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
format::b_fs_yx_fsv16,
|
||||
format::fs_b_yx_fsv32,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::b_fs_yx_fsv4),
|
||||
testing::Values(false)),
|
||||
format::b_fs_yx_fsv4)),
|
||||
testing::internal::DefaultParamName<pooling_random_test_params>);
|
||||
|
||||
TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride)
|
||||
@ -3485,10 +2914,10 @@ public:
|
||||
all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride));
|
||||
|
||||
// Output padding
|
||||
all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_pad(2, 3, size), { { 0, 0, 1, 5 }, { 0, 0, 19, 4 } }));
|
||||
all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_pad(2, 3, size), generate_pad(2, 3, size), { { 0, 0, 1, 5 }, { 0, 0, 19, 4 } }));
|
||||
|
||||
// Input + output padding
|
||||
all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_pad(2, 3, size), { { 0, 0, 2, 1 }, { 0, 0, 3, 4 } }));
|
||||
all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_pad(2, 3, size), generate_pad(2, 3, size), { { 0, 0, 2, 1 }, { 0, 0, 3, 4 } }));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3545,8 +2974,8 @@ public:
|
||||
int height = generic_params->input_layouts[0].spatial(1);
|
||||
int width = generic_params->input_layouts[0].spatial(0);
|
||||
|
||||
auto pad_height = pooling->pad[0];
|
||||
auto pad_width = pooling->pad[1];
|
||||
auto pad_height = pooling->pads_begin[0];
|
||||
auto pad_width = pooling->pads_begin[1];
|
||||
|
||||
auto kernel_height = pooling->size[0];
|
||||
auto kernel_width = pooling->size[1];
|
||||
@ -3581,8 +3010,8 @@ public:
|
||||
|
||||
cldnn::pooling_mode pooling_mode = pooling->mode;
|
||||
|
||||
int pad_width = pooling->pad[1];
|
||||
int pad_height = pooling->pad[0];
|
||||
int pad_width = pooling->pads_begin[1];
|
||||
int pad_height = pooling->pads_begin[0];
|
||||
|
||||
int kernel_width = pooling->size[1];
|
||||
int kernel_height = pooling->size[0];
|
||||
|
@ -552,7 +552,8 @@ inline void PrintTupleTo(const std::tuple<std::shared_ptr<test_params>, std::sha
|
||||
auto pooling = std::static_pointer_cast<cldnn::pooling>(primitive);
|
||||
std::string pooling_mode = (pooling->mode == cldnn::pooling_mode::max) ? "max" : "average";
|
||||
str << "Pooling mode: " << pooling_mode
|
||||
<< " Pad x: " << pooling->pad[1] << " Pad y: " << pooling->pad[0]
|
||||
<< " Pads_begin x: " << pooling->pads_begin[1] << " Pads_begin y: " << pooling->pads_begin[0]
|
||||
<< " Pads_end x: " << pooling->pads_end[1] << " Pads_end y: " << pooling->pads_end[0]
|
||||
<< " Stride x: " << pooling->stride[1] << " Stride y: " << pooling->stride[0]
|
||||
<< " Size x: " << pooling->size[1] << " Size y: " << pooling->size[0];
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user