[GPU] MaxPool-8 (#9064)
This commit is contained in:
@@ -164,9 +164,54 @@ struct pooling : public primitive_base<pooling> {
|
||||
size(0, 0, 0, 0),
|
||||
with_output_size(false) {}
|
||||
|
||||
/// @brief Constructs pooling primitive that supports MaxPool features from opset8 (dilation and indices output).
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param indices_output Indices output primitive id.
|
||||
/// @param size Pooling kernel size.
|
||||
/// @param stride Defines shift in input buffer between adjacent calculations of output values.
|
||||
/// @param dilation Defines index of next pixel to select when pooling.
|
||||
/// @param pad Defines logical pad value added to input tensor.
|
||||
/// @param pad_end Defines a shift, relative to the end of padding shape.
|
||||
/// @param axis First dimension of input that should be used to calculate the upper bound of index output.
|
||||
/// @param index_element_type Data type of index output.
|
||||
/// @param output_size User-defined output data size of the primitive (w/o padding).
|
||||
pooling(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const primitive_id& indices_output,
|
||||
const tensor& size,
|
||||
const tensor& stride,
|
||||
const tensor& dilation,
|
||||
const tensor& pad,
|
||||
const tensor& pad_end,
|
||||
int64_t axis,
|
||||
data_types index_element_type,
|
||||
tensor output_size,
|
||||
const data_types output_data_type,
|
||||
const primitive_id& ext_prim_id = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input, indices_output}, ext_prim_id, output_padding, optional_data_type{output_data_type}),
|
||||
argmax(""),
|
||||
indices_output(indices_output),
|
||||
mode(pooling_mode::max),
|
||||
global_pooling(false),
|
||||
pad(pad),
|
||||
stride(stride),
|
||||
dilation(dilation),
|
||||
size(size),
|
||||
with_output_size(true),
|
||||
output_size(output_size),
|
||||
pad_end(pad_end),
|
||||
axis(axis),
|
||||
index_element_type(index_element_type),
|
||||
maxPoolOpset8Features(true)
|
||||
{}
|
||||
|
||||
/// @brief Primitive id which contains indices of each max pooling region.
|
||||
/// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
|
||||
primitive_id argmax;
|
||||
/// @brief Primitive id which contains indices output.
|
||||
primitive_id indices_output;
|
||||
/// @brief Pooling mode.
|
||||
pooling_mode mode;
|
||||
/// @brief Global pooling (kernel size is equal to the spatial dimension of input tensor)
|
||||
@@ -175,6 +220,8 @@ struct pooling : public primitive_base<pooling> {
|
||||
tensor pad;
|
||||
/// @brief Defines shift in input buffer between adjacent calculations of output values.
|
||||
tensor stride;
|
||||
/// @brief Defines index of next pixel to select when pooling
|
||||
tensor dilation;
|
||||
/// @brief Pooling kernel size.
|
||||
tensor size;
|
||||
/// @brief Indicates that the primitive has user-defined output size (non-zero value).
|
||||
@@ -183,12 +230,20 @@ struct pooling : public primitive_base<pooling> {
|
||||
tensor output_size;
|
||||
/// @brief Defines a shift, relative to the end of padding shape.
|
||||
tensor pad_end;
|
||||
/// @brief first dimension of input that should be used to calculate the upper bound of index output
|
||||
int64_t axis;
|
||||
/// @brief type of index output
|
||||
data_types index_element_type;
|
||||
bool maxPoolOpset8Features{false};
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
if (argmax.empty())
|
||||
return {};
|
||||
return {argmax};
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
if (!argmax.empty())
|
||||
ret.push_back(argmax);
|
||||
if (!indices_output.empty())
|
||||
ret.push_back(indices_output);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
|
||||
@@ -60,8 +60,25 @@ JitConstants PoolingKernelBase::GetJitConstants(const pooling_params& pp, Poolin
|
||||
MakeJitConstant(toString(pp.divMode) + "_KERNEL_DIVIDER", 1),
|
||||
});
|
||||
|
||||
if (pp.maxPoolOpset8Features) {
|
||||
mem_consts.AddConstants({MakeJitConstant("DILATION", pp.poolDilation)});
|
||||
|
||||
if (pp.poolAxis != 0) {
|
||||
size_t indices_upper_bound = 1;
|
||||
const auto& dims = pp.inputs[0].GetDims();
|
||||
for (auto d = dims.crbegin() + pp.poolAxis; d != dims.crend(); ++d) {
|
||||
indices_upper_bound *= d->v;
|
||||
}
|
||||
if (indices_upper_bound != 0 && indices_upper_bound != 1) {
|
||||
mem_consts.AddConstants({MakeJitConstant("INDICES_UPPER_BOUND", indices_upper_bound)});
|
||||
}
|
||||
}
|
||||
|
||||
mem_consts.Merge(MakeTypeJitConstants(pp.poolIndexElementType, "SELECTED_INDICES"));
|
||||
}
|
||||
|
||||
if (dispatchData.needsBoundary) {
|
||||
mem_consts.AddConstant(MakeJitConstant("CHECK_BOUNDRY", 1));
|
||||
mem_consts.AddConstant(MakeJitConstant("CHECK_BOUNDARY", 1));
|
||||
}
|
||||
|
||||
if (EnableRound(pp)) {
|
||||
@@ -78,6 +95,8 @@ bool PoolingKernelBase::NeedsBoundaryCheck(const pooling_params& pp) const {
|
||||
|
||||
if (pp.poolPad.x != 0 || pp.poolPad.y != 0 || pp.poolPad.z != 0) {
|
||||
return true;
|
||||
} else if (pp.poolDilation.x > 1 || pp.poolDilation.y > 1 || pp.poolDilation.z > 1) {
|
||||
return true;
|
||||
} else if ((((input.X().v - pp.poolSize.x) / pp.poolStride.x) + 1) < output.X().v ||
|
||||
(((input.Y().v - pp.poolSize.y) / pp.poolStride.y) + 1) < output.Y().v ||
|
||||
(((input.Z().v - pp.poolSize.z) / pp.poolStride.z) + 1) < output.Z().v) {
|
||||
@@ -181,9 +200,13 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params,
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1,
|
||||
GetFusedPrimitiveInputsCount(params));
|
||||
uint32_t param_idx = 1;
|
||||
if (orgParams.poolType == PoolType::MAX_WITH_ARGMAX)
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++});
|
||||
|
||||
if (orgParams.maxPoolOpset8Features) {
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++});
|
||||
}
|
||||
|
||||
return {kd};
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common_types.h"
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
@@ -21,6 +22,10 @@ struct pooling_params : public base_params {
|
||||
uSize poolSize;
|
||||
uSize poolStride;
|
||||
uSize poolPad;
|
||||
bool maxPoolOpset8Features = false;
|
||||
uSize poolDilation{1, 1, 1};
|
||||
Datatype poolIndexElementType = Datatype::INT64;
|
||||
int64_t poolAxis = 0;
|
||||
|
||||
ParamsKey GetParamsKey() const override {
|
||||
ParamsKey k = base_params::GetParamsKey();
|
||||
@@ -29,6 +34,11 @@ struct pooling_params : public base_params {
|
||||
k.EnablePoolRemainder(remainderAction);
|
||||
k.EnablePoolKernelDividerMode(divMode);
|
||||
|
||||
if (maxPoolOpset8Features) {
|
||||
k.EnablePoolDilation();
|
||||
k.EnablePoolIndicesOutput();
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -27,6 +27,8 @@ ParamsKey PoolingKernelGPURef::GetSupportedKey() const {
|
||||
k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC);
|
||||
k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC_WITH_PADDING);
|
||||
k.EnableDifferentTypes();
|
||||
k.EnablePoolDilation();
|
||||
k.EnablePoolIndicesOutput();
|
||||
return k;
|
||||
}
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ KERNEL(pooling_gpu_b_fs_yx_fsv4)(
|
||||
|
||||
ACCUMULATOR_TYPE result[4] = { INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL };
|
||||
|
||||
#ifdef CHECK_BOUNDRY
|
||||
#ifdef CHECK_BOUNDARY
|
||||
if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
|
||||
offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y)
|
||||
{
|
||||
@@ -96,7 +96,7 @@ KERNEL(pooling_gpu_b_fs_yx_fsv4)(
|
||||
const int wend = min(offset_x + POOL_SIZE_X, INPUT0_SIZE_X + PADDING_SIZE_X);
|
||||
const uint num_elements = (hend - offset_y) * (wend - offset_x);
|
||||
#endif
|
||||
#else // !CHECK_BOUNDRY
|
||||
#else // !CHECK_BOUNDARY
|
||||
uint input_idx = GET_DATA_B_FS_YX_FSV4_INDEX(INPUT0, b, f, offset_y, offset_x);
|
||||
|
||||
for(uint j = 0; j < POOL_SIZE_Y; j++)
|
||||
|
||||
@@ -255,7 +255,7 @@ KERNEL(pooling_gpu_b_fs_zyx_fsv16)(
|
||||
ACCUMULATOR_TYPE result[FEATURE_SLICE_SIZE] = { INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL,
|
||||
INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL };
|
||||
|
||||
#ifdef CHECK_BOUNDRY
|
||||
#ifdef CHECK_BOUNDARY
|
||||
if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
|
||||
offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y ||
|
||||
offset_z + POOL_SIZE_Z < 0 || offset_z >= INPUT0_SIZE_Z)
|
||||
@@ -341,7 +341,7 @@ KERNEL(pooling_gpu_b_fs_zyx_fsv16)(
|
||||
const int wend = min(offset_x + POOL_SIZE_X, INPUT0_SIZE_X + PADDING_SIZE_X);
|
||||
const uint num_elements = (dend - offset_z) * (hend - offset_y) * (wend - offset_x);
|
||||
#endif
|
||||
#else // !CHECK_BOUNDRY
|
||||
#else // !CHECK_BOUNDARY
|
||||
#if INPUT0_DIMS == 4
|
||||
uint input_idx = INPUT0_GET_INDEX(b, f, offset_y, offset_x);
|
||||
#else
|
||||
|
||||
@@ -48,7 +48,7 @@ KERNEL(pooling_gpu_bs_fs_yx_bsv16_fsv16)(const __global INPUT0_TYPE* input,
|
||||
const uint input_fs_pitch = input_y_pitch * (INPUT0_PAD_BEFORE_SIZE_Y + INPUT0_SIZE_Y + INPUT0_PAD_AFTER_SIZE_Y);
|
||||
int16 result = INIT_VAL;
|
||||
|
||||
#ifdef CHECK_BOUNDRY
|
||||
#ifdef CHECK_BOUNDARY
|
||||
uint batch_and_feature_offset = GET_DATA_BS_FS_YX_BSV16_FSV16_INDEX(INPUT0, b, f, 0, 0);
|
||||
if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X || offset_y + POOL_SIZE_Y < 0 ||
|
||||
offset_y >= INPUT0_SIZE_Y) {
|
||||
@@ -88,7 +88,7 @@ KERNEL(pooling_gpu_bs_fs_yx_bsv16_fsv16)(const __global INPUT0_TYPE* input,
|
||||
const int wend = min(offset_x + POOL_SIZE_X, INPUT0_SIZE_X + PADDING_SIZE_X);
|
||||
const uint num_elements = (hend - offset_y) * (wend - offset_x);
|
||||
#endif
|
||||
#else // !CHECK_BOUNDRY
|
||||
#else // !CHECK_BOUNDARY
|
||||
uint input_idx = GET_DATA_BS_FS_YX_BSV16_FSV16_INDEX(INPUT0, b, f, offset_y, offset_x);
|
||||
__attribute__((opencl_unroll_hint(POOL_SIZE_Y)))
|
||||
for (uint j = 0; j < POOL_SIZE_Y; j++) {
|
||||
|
||||
@@ -53,7 +53,7 @@ KERNEL(pooling_gpu_byxf_opt)(
|
||||
const int offset_x = (int)x*STRIDE_SIZE_X - PADDING_SIZE_X;
|
||||
const int offset_y = (int)y*STRIDE_SIZE_Y - PADDING_SIZE_Y;
|
||||
|
||||
#ifdef CHECK_BOUNDRY
|
||||
#ifdef CHECK_BOUNDARY
|
||||
if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
|
||||
offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y)
|
||||
{
|
||||
|
||||
@@ -84,7 +84,7 @@ KERNEL(pooling_gpu_fs_b_yx_fsv32)(
|
||||
const size_t fs_offset = fs * fs_pitch; // locate beginning of feature tile
|
||||
const size_t b_offset = b * b_pitch; // locate beginning of batch
|
||||
|
||||
#ifdef CHECK_BOUNDRY
|
||||
#ifdef CHECK_BOUNDARY
|
||||
if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
|
||||
offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y)
|
||||
{
|
||||
@@ -121,7 +121,7 @@ KERNEL(pooling_gpu_fs_b_yx_fsv32)(
|
||||
const int wend = min(offset_x + POOL_SIZE_X, INPUT0_SIZE_X + PADDING_SIZE_X);
|
||||
const uint num_elements = (hend - offset_y) * (wend - offset_x);
|
||||
#endif
|
||||
#else // !CHECK_BOUNDRY
|
||||
#else // !CHECK_BOUNDARY
|
||||
for(uint in_dy = 0; in_dy < POOL_SIZE_Y; in_dy++)
|
||||
{
|
||||
const size_t input_offset_y = (offset_y + in_dy) * y_pitch;
|
||||
|
||||
@@ -92,7 +92,7 @@ KERNEL(pooling_gpu_int8_ref)(
|
||||
|
||||
ACCUMULATOR_TYPE result = INIT_VAL;
|
||||
|
||||
#ifdef CHECK_BOUNDRY
|
||||
#ifdef CHECK_BOUNDARY
|
||||
if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
|
||||
offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y ||
|
||||
offset_z + POOL_SIZE_Z < 0 || offset_z >= INPUT0_SIZE_Z)
|
||||
@@ -155,7 +155,7 @@ KERNEL(pooling_gpu_int8_ref)(
|
||||
|
||||
#endif // DYNAMIC_WITH_PADDING_KERNEL_DIVIDER
|
||||
|
||||
#else // CHECK_BOUNDRY
|
||||
#else // CHECK_BOUNDARY
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
for(uint l = 0; l < POOL_SIZE_Z; l++)
|
||||
@@ -179,7 +179,7 @@ KERNEL(pooling_gpu_int8_ref)(
|
||||
const uint num_elementes = POOL_SIZE_X*POOL_SIZE_Y*POOL_SIZE_Z;
|
||||
#endif
|
||||
|
||||
#endif // CHECK_BOUNDRY
|
||||
#endif // CHECK_BOUNDARY
|
||||
|
||||
#if defined AVG_POOLING
|
||||
#if ENABLE_ROUND
|
||||
|
||||
@@ -28,6 +28,9 @@ KERNEL(pooling_gpu)(
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
, __global float* arg_max
|
||||
#endif
|
||||
#ifdef SELECTED_INDICES_TYPE
|
||||
, __global SELECTED_INDICES_TYPE* indices
|
||||
#endif
|
||||
#if HAS_FUSED_OPS_DECLS
|
||||
, FUSED_OPS_DECLS
|
||||
#endif
|
||||
@@ -91,11 +94,15 @@ KERNEL(pooling_gpu)(
|
||||
|
||||
ACCUMULATOR_TYPE result = INIT_VAL;
|
||||
|
||||
#ifdef SELECTED_INDICES_TYPE
|
||||
uint result_idx = 0;
|
||||
#endif
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
uint arg_max_idx = 0;
|
||||
#endif
|
||||
|
||||
#ifdef CHECK_BOUNDRY
|
||||
#ifdef CHECK_BOUNDARY
|
||||
if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
|
||||
offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y ||
|
||||
offset_z + POOL_SIZE_Z < 0 || offset_z >= INPUT0_SIZE_Z)
|
||||
@@ -107,6 +114,16 @@ KERNEL(pooling_gpu)(
|
||||
uint num_elementes = 0;
|
||||
#endif
|
||||
|
||||
#ifndef DILATION_SIZE_X
|
||||
#define DILATION_SIZE_X 1
|
||||
#endif
|
||||
#ifndef DILATION_SIZE_Y
|
||||
#define DILATION_SIZE_Y 1
|
||||
#endif
|
||||
#ifndef DILATION_SIZE_Z
|
||||
#define DILATION_SIZE_Z 1
|
||||
#endif
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
const uint batch_and_feature_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0);
|
||||
#else
|
||||
@@ -116,20 +133,20 @@ KERNEL(pooling_gpu)(
|
||||
#if OUTPUT_DIMS == 5
|
||||
for(uint l = 0; l < POOL_SIZE_Z; l++)
|
||||
{
|
||||
int input_offset_z = offset_z + l;
|
||||
int input_offset_z = offset_z + (l * DILATION_SIZE_Z);
|
||||
bool zero_z = input_offset_z >= INPUT0_SIZE_Z || input_offset_z < 0;
|
||||
if (!zero_z)
|
||||
{
|
||||
#endif
|
||||
for(uint j = 0; j < POOL_SIZE_Y; j++)
|
||||
{
|
||||
int input_offset_y = offset_y + j;
|
||||
int input_offset_y = offset_y + (j * DILATION_SIZE_Y);
|
||||
bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
|
||||
if(!zero_y)
|
||||
{
|
||||
for(uint i = 0; i < POOL_SIZE_X; i++)
|
||||
{
|
||||
int input_offset_x = offset_x + i;
|
||||
int input_offset_x = offset_x + (i * DILATION_SIZE_X);
|
||||
bool zero = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
|
||||
if(!zero)
|
||||
{
|
||||
@@ -159,7 +176,17 @@ KERNEL(pooling_gpu)(
|
||||
arg_max_idx = input_idx_bfyx_no_padding;
|
||||
}
|
||||
#endif
|
||||
result = FUNC_CALL(apply_pooling)(result, TO_ACCUMULATOR_TYPE(input[input_idx]));
|
||||
const ACCUMULATOR_TYPE casted_input = TO_ACCUMULATOR_TYPE(input[input_idx]);
|
||||
#ifdef SELECTED_INDICES_TYPE
|
||||
if (casted_input > result)
|
||||
{
|
||||
result = casted_input;
|
||||
result_idx = input_idx;
|
||||
}
|
||||
#else
|
||||
result = FUNC_CALL(apply_pooling)(result, casted_input);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DYNAMIC_KERNEL_DIVIDER
|
||||
num_elementes++;
|
||||
@@ -185,7 +212,7 @@ KERNEL(pooling_gpu)(
|
||||
|
||||
#endif // DYNAMIC_WITH_PADDING_KERNEL_DIVIDER
|
||||
|
||||
#else // CHECK_BOUNDRY
|
||||
#else // CHECK_BOUNDARY
|
||||
|
||||
#if OUTPUT_DIMS == 5 // 3D
|
||||
uint input_idx = INPUT0_GET_INDEX(b, f, offset_z, offset_y, offset_x);
|
||||
@@ -227,7 +254,16 @@ KERNEL(pooling_gpu)(
|
||||
uint input_idx = INPUT0_GET_INDEX(b, f, offset_y + j, offset_x + i);
|
||||
result = FUNC_CALL(apply_pooling)(result, TO_ACCUMULATOR_TYPE(input[input_idx]));
|
||||
#else
|
||||
result = FUNC_CALL(apply_pooling)(result, TO_ACCUMULATOR_TYPE(input[input_idx]));
|
||||
#ifdef SELECTED_INDICES_TYPE
|
||||
const current_input_value = input[input_idx];
|
||||
if (current_input_value > result)
|
||||
{
|
||||
result = current_input_value;
|
||||
result_idx = input_idx;
|
||||
}
|
||||
#else
|
||||
result = FUNC_CALL(apply_pooling)(result, TO_ACCUMULATOR_TYPE(input[input_idx]));
|
||||
#endif
|
||||
input_idx += INPUT0_X_PITCH;
|
||||
#endif
|
||||
#endif
|
||||
@@ -253,7 +289,7 @@ KERNEL(pooling_gpu)(
|
||||
const uint num_elementes = POOL_SIZE_X*POOL_SIZE_Y*POOL_SIZE_Z;
|
||||
#endif
|
||||
|
||||
#endif // CHECK_BOUNDRY
|
||||
#endif // CHECK_BOUNDARY
|
||||
|
||||
#if defined AVG_POOLING
|
||||
#if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER)
|
||||
@@ -280,6 +316,13 @@ KERNEL(pooling_gpu)(
|
||||
#endif
|
||||
output[output_pos] = final_result;
|
||||
|
||||
#ifdef SELECTED_INDICES_TYPE
|
||||
#ifdef INDICES_UPPER_BOUND
|
||||
result_idx %= INDICES_UPPER_BOUND;
|
||||
#endif
|
||||
indices[output_pos] = TO_SELECTED_INDICES_TYPE(result_idx);
|
||||
#endif
|
||||
|
||||
#if MAX_WITH_ARGMAX_POOLING
|
||||
//INPUT1 macro stands for Argmax
|
||||
const uint arg_max_pos = GET_DATA_INDEX_5D(INPUT1, b, f, z, y, x);
|
||||
@@ -288,3 +331,7 @@ KERNEL(pooling_gpu)(
|
||||
}
|
||||
|
||||
#undef INIT_VAL
|
||||
|
||||
#undef DILATION_SIZE_X
|
||||
#undef DILATION_SIZE_Y
|
||||
#undef DILATION_SIZE_Z
|
||||
|
||||
@@ -124,6 +124,8 @@ public:
|
||||
uint32_t dynamicKenrelDivider : 1;
|
||||
uint32_t dynamicKenrelDividerWithPadding : 1;
|
||||
uint32_t position_sensitive : 1;
|
||||
uint32_t dilation : 1;
|
||||
uint32_t indices_output : 1;
|
||||
} pooling;
|
||||
struct conv_t {
|
||||
uint32_t split : 1;
|
||||
@@ -281,6 +283,8 @@ public:
|
||||
void EnablePoolKernelDividerMode(KernelDividerMode m);
|
||||
void EnablePoolType(PoolType t);
|
||||
void EnablePoolRemainder(PoolRemainder r);
|
||||
void EnablePoolDilation() { key.restrict.val.dedicated.pooling.dilation = 1; }
|
||||
void EnablePoolIndicesOutput() { key.restrict.val.dedicated.pooling.indices_output = 1; }
|
||||
void EnableQuantization(QuantizationType q);
|
||||
void EnablePositionSensitivePooling() { key.restrict.val.dedicated.pooling.position_sensitive = 1; }
|
||||
void EnableSplitSupport() { key.restrict.val.dedicated.conv.split = 1; }
|
||||
|
||||
@@ -89,6 +89,24 @@ public:
|
||||
get_default_optional_params<kernel_selector::pooling_optional_params>(arg.get_program());
|
||||
|
||||
const auto primitive = arg.get_primitive();
|
||||
|
||||
pool_params.maxPoolOpset8Features = primitive->maxPoolOpset8Features;
|
||||
if (pool_params.maxPoolOpset8Features) {
|
||||
switch (primitive->index_element_type) {
|
||||
case cldnn::data_types::i32: {
|
||||
pool_params.poolIndexElementType = kernel_selector::Datatype::INT32;
|
||||
break;
|
||||
}
|
||||
case cldnn::data_types::i64: {
|
||||
pool_params.poolIndexElementType = kernel_selector::Datatype::INT64;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw std::runtime_error{"Not supported index element type"};
|
||||
}
|
||||
pool_params.poolAxis = primitive->axis;
|
||||
}
|
||||
|
||||
const auto& stride = primitive->stride;
|
||||
const auto& pad = primitive->pad;
|
||||
const auto& input_sizes = arg.input().get_output_layout().size;
|
||||
@@ -134,6 +152,9 @@ public:
|
||||
|
||||
pp.poolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
|
||||
|
||||
const auto& dilation = primitive->dilation;
|
||||
pp.poolDilation = {(uint32_t)dilation.spatial[0], (uint32_t)dilation.spatial[1], (uint32_t)dilation.spatial[2]};
|
||||
|
||||
auto& kernel_selector = kernel_selector::pooling_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(pool_params, pool_optional_params);
|
||||
|
||||
|
||||
@@ -221,6 +221,7 @@ REGISTER_FACTORY(v8, NV12toBGR);
|
||||
REGISTER_FACTORY(v8, I420toRGB);
|
||||
REGISTER_FACTORY(v8, I420toBGR);
|
||||
REGISTER_FACTORY(v8, RandomUniform)
|
||||
REGISTER_FACTORY(v8, MaxPool);
|
||||
|
||||
// --------------------------- Supported internal ops --------------------------- //
|
||||
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "ngraph/op/max_pool.hpp"
|
||||
#include "ngraph/op/avg_pool.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/mutable_data.hpp"
|
||||
#include "intel_gpu/primitives/pooling.hpp"
|
||||
|
||||
namespace ov {
|
||||
@@ -17,6 +18,7 @@ namespace intel_gpu {
|
||||
struct PoolingParameters {
|
||||
cldnn::tensor kernel;
|
||||
cldnn::tensor stride;
|
||||
cldnn::tensor dilation;
|
||||
cldnn::tensor pad_begin;
|
||||
cldnn::tensor pad_end;
|
||||
};
|
||||
@@ -24,10 +26,15 @@ struct PoolingParameters {
|
||||
static PoolingParameters GetPoolingParameters(const ngraph::Shape& kernel,
|
||||
const ngraph::Strides& strides,
|
||||
const ngraph::Shape& pads_begin,
|
||||
const ngraph::Shape& pads_end) {
|
||||
const ngraph::Shape& pads_end,
|
||||
const ngraph::Strides& dilations = {}) {
|
||||
cldnn::tensor k, s, pb, pe;
|
||||
if (pads_begin.size() != strides.size() || pads_end.size() != strides.size() || kernel.size() != strides.size())
|
||||
IE_THROW() << "Strides, KernelSizes and Pads are supposed to have the same elements count";
|
||||
cldnn::tensor d{cldnn::batch(1), cldnn::feature(1), cldnn::spatial(1, 1, 1)};
|
||||
const auto is_dilation_specified = !dilations.empty();
|
||||
|
||||
if (pads_begin.size() != strides.size() || pads_end.size() != strides.size() || kernel.size() != strides.size()
|
||||
|| (is_dilation_specified && dilations.size() != strides.size()))
|
||||
IE_THROW() << "Strides, KernelSizes, Pads (and Dilations, if specified) are supposed to have the same elements count";
|
||||
|
||||
std::vector<cldnn::tensor::value_type> pb_casted(pads_begin.begin(), pads_begin.end());
|
||||
std::vector<cldnn::tensor::value_type> pe_casted(pads_end.begin(), pads_end.end());
|
||||
@@ -37,6 +44,9 @@ static PoolingParameters GetPoolingParameters(const ngraph::Shape& kernel,
|
||||
s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[2], strides[1], strides[0]));
|
||||
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pb_casted[2], pb_casted[1], pb_casted[0]));
|
||||
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pe_casted[2], pe_casted[1], pe_casted[0]));
|
||||
if (is_dilation_specified) {
|
||||
d = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[2], dilations[1], dilations[0]));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
@@ -44,6 +54,9 @@ static PoolingParameters GetPoolingParameters(const ngraph::Shape& kernel,
|
||||
s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[1], strides[0], 1));
|
||||
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pb_casted[1], pb_casted[0], 0));
|
||||
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pe_casted[1], pe_casted[0], 0));
|
||||
if (is_dilation_specified) {
|
||||
d = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[1], dilations[0], 1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
@@ -51,12 +64,15 @@ static PoolingParameters GetPoolingParameters(const ngraph::Shape& kernel,
|
||||
s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[0], 1, 1));
|
||||
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pb_casted[0], 0, 0));
|
||||
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pe_casted[0], 0, 0));
|
||||
if (is_dilation_specified) {
|
||||
d = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[0], 1, 1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: IE_THROW() << "Unsupported pooling parameters size. Only 1d, 2d, and 3d cases are supported";
|
||||
}
|
||||
|
||||
return {k, s, pb, pe};
|
||||
return {k, s, d, pb, pe};
|
||||
}
|
||||
|
||||
static void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::AvgPool>& op) {
|
||||
@@ -99,7 +115,60 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Ma
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
}
|
||||
|
||||
static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v8::MaxPool>& op) {
|
||||
p.ValidateInputs(op, {1});
|
||||
if (op->get_output_size() != 2) {
|
||||
IE_THROW() << "MaxPool opset 8 requires 2 outputs";
|
||||
}
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
const auto layer_type_name = layer_type_name_ID(op);
|
||||
const auto layerName = layer_type_name + ".0";
|
||||
|
||||
const auto mutable_precision = op->get_output_element_type(1);
|
||||
const auto output_shape = op->get_output_shape(1);
|
||||
cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision),
|
||||
DefaultFormatForDims(output_shape.size()),
|
||||
tensor_from_dims(output_shape));
|
||||
const auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
|
||||
const cldnn::primitive_id maxpool_mutable_id_w = layer_type_name + "_md_write";
|
||||
const auto op_friendly_name = op->get_friendly_name();
|
||||
const auto indices_mutable_prim = cldnn::mutable_data(maxpool_mutable_id_w,
|
||||
shared_memory,
|
||||
op_friendly_name);
|
||||
p.primitiveIDs[maxpool_mutable_id_w] = maxpool_mutable_id_w;
|
||||
p.AddPrimitive(indices_mutable_prim);
|
||||
inputPrimitives.push_back(maxpool_mutable_id_w);
|
||||
|
||||
const auto params = GetPoolingParameters(op->get_kernel(), op->get_strides(), op->get_pads_begin(), op->get_pads_end(), op->get_dilations());
|
||||
auto poolPrim = cldnn::pooling(layerName,
|
||||
inputPrimitives[0],
|
||||
inputPrimitives.back(),
|
||||
params.kernel,
|
||||
params.stride,
|
||||
params.dilation,
|
||||
params.pad_begin,
|
||||
params.pad_end,
|
||||
op->get_axis(),
|
||||
DataTypeFromPrecision(op->get_index_element_type()),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
op_friendly_name);
|
||||
p.AddPrimitive(poolPrim);
|
||||
|
||||
const cldnn::primitive_id maxpool_mutable_id_r = layer_type_name + ".1";
|
||||
const auto indices_mutable_id_r = cldnn::mutable_data(maxpool_mutable_id_r,
|
||||
{ layerName },
|
||||
shared_memory,
|
||||
op_friendly_name);
|
||||
p.primitiveIDs[maxpool_mutable_id_r] = maxpool_mutable_id_r;
|
||||
p.AddPrimitive(indices_mutable_id_r);
|
||||
|
||||
p.AddPrimitiveToProfiler(poolPrim, op);
|
||||
}
|
||||
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, MaxPool);
|
||||
REGISTER_FACTORY_IMPL(v8, MaxPool);
|
||||
REGISTER_FACTORY_IMPL(v1, AvgPool);
|
||||
|
||||
} // namespace intel_gpu
|
||||
|
||||
@@ -363,6 +363,8 @@ const auto maxPoolv8_ExplicitPad_FloorRounding_Params = ::testing::Combine(
|
||||
::testing::ValuesIn(dilation),
|
||||
::testing::ValuesIn(padBegins),
|
||||
::testing::ValuesIn(padEnds),
|
||||
::testing::Values(ngraph::element::Type_t::i32),
|
||||
::testing::Values(0),
|
||||
::testing::Values(ngraph::op::RoundingType::FLOOR),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
@@ -386,6 +388,8 @@ const auto maxPoolv8_SameUpperPad_FloorRounding_Params = ::testing::Combine(
|
||||
::testing::ValuesIn(dilation),
|
||||
::testing::ValuesIn(padBegins),
|
||||
::testing::ValuesIn(padEnds),
|
||||
::testing::Values(ngraph::element::Type_t::i32),
|
||||
::testing::Values(0),
|
||||
::testing::Values(ngraph::op::RoundingType::FLOOR),
|
||||
::testing::Values(ngraph::op::PadType::SAME_UPPER)
|
||||
);
|
||||
@@ -409,6 +413,8 @@ const auto maxPoolv8_SameLowerPad_FloorRounding_Params = ::testing::Combine(
|
||||
::testing::ValuesIn(dilation),
|
||||
::testing::ValuesIn(padBegins),
|
||||
::testing::ValuesIn(padEnds),
|
||||
::testing::Values(ngraph::element::Type_t::i32),
|
||||
::testing::Values(0),
|
||||
::testing::Values(ngraph::op::RoundingType::FLOOR),
|
||||
::testing::Values(ngraph::op::PadType::SAME_LOWER)
|
||||
);
|
||||
@@ -432,6 +438,8 @@ const auto maxPoolv8_ExplicitPad_FloorRounding_5Dinput_Params = ::testing::Combi
|
||||
::testing::Values(dilation3D[0]),
|
||||
::testing::ValuesIn(padBegins3D),
|
||||
::testing::ValuesIn(padEnds3D),
|
||||
::testing::Values(ngraph::element::Type_t::i32),
|
||||
::testing::Values(0),
|
||||
::testing::Values(ngraph::op::RoundingType::FLOOR),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
@@ -455,6 +463,8 @@ const auto maxPoolv8_SameUpperPad_FloorRounding_5Dinput_Params = ::testing::Comb
|
||||
::testing::ValuesIn(dilation3D),
|
||||
::testing::ValuesIn(padBegins3D),
|
||||
::testing::ValuesIn(padEnds3D),
|
||||
::testing::Values(ngraph::element::Type_t::i32),
|
||||
::testing::Values(0),
|
||||
::testing::Values(ngraph::op::RoundingType::FLOOR),
|
||||
::testing::Values(ngraph::op::PadType::SAME_UPPER)
|
||||
);
|
||||
@@ -478,6 +488,8 @@ const auto maxPoolv8_SameLowerPad_CeilRounding_5Dinput_Params = ::testing::Combi
|
||||
::testing::ValuesIn(dilation3D),
|
||||
::testing::ValuesIn(padBegins3D),
|
||||
::testing::ValuesIn(padEnds3D),
|
||||
::testing::Values(ngraph::element::Type_t::i32),
|
||||
::testing::Values(0),
|
||||
::testing::Values(ngraph::op::RoundingType::CEIL),
|
||||
::testing::Values(ngraph::op::PadType::SAME_LOWER)
|
||||
);
|
||||
@@ -501,6 +513,8 @@ const auto maxPoolv8_ExplicitPad_CeilRounding_Params = ::testing::Combine(
|
||||
::testing::ValuesIn(dilation),
|
||||
::testing::ValuesIn(padBegins),
|
||||
::testing::ValuesIn(padEnds),
|
||||
::testing::Values(ngraph::element::Type_t::i32),
|
||||
::testing::Values(0),
|
||||
::testing::Values(ngraph::op::RoundingType::CEIL),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
@@ -549,6 +563,8 @@ const auto maxPoolv8_ValidPad_Params = ::testing::Combine(
|
||||
::testing::ValuesIn(dilation),
|
||||
::testing::Values(std::vector<size_t>({0, 0})),
|
||||
::testing::Values(std::vector<size_t>({0, 0})),
|
||||
::testing::Values(ngraph::element::Type_t::i32),
|
||||
::testing::Values(0),
|
||||
::testing::Values(ngraph::op::RoundingType::FLOOR), // placeholder value - Rounding Type not applicable for Valid pad type
|
||||
::testing::Values(ngraph::op::PadType::VALID)
|
||||
);
|
||||
|
||||
@@ -144,7 +144,9 @@ public:
|
||||
std::vector<size_t> padBegin, padEnd;
|
||||
ngraph::op::PadType padType;
|
||||
ngraph::op::RoundingType roundingType;
|
||||
std::tie(kernel, stride, dilation, padBegin, padEnd, roundingType, padType) = basicParamsSet;
|
||||
ngraph::element::Type indexElementType;
|
||||
int64_t axis;
|
||||
std::tie(kernel, stride, dilation, padBegin, padEnd, indexElementType, axis, roundingType, padType) = basicParamsSet;
|
||||
|
||||
std::ostringstream results;
|
||||
results << "IS=(";
|
||||
@@ -181,7 +183,9 @@ protected:
|
||||
std::vector<size_t> padBegin, padEnd;
|
||||
ngraph::op::PadType padType;
|
||||
ngraph::op::RoundingType roundingType;
|
||||
std::tie(kernel, stride, dilation, padBegin, padEnd, roundingType, padType) = basicParamsSet;
|
||||
ngraph::element::Type indexElementType;
|
||||
int64_t axis;
|
||||
std::tie(kernel, stride, dilation, padBegin, padEnd, indexElementType, axis, roundingType, padType) = basicParamsSet;
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
if (selectedType.empty()) {
|
||||
selectedType = getPrimitiveType();
|
||||
@@ -192,7 +196,8 @@ protected:
|
||||
|
||||
auto params = ngraph::builder::makeDynamicParams(inPrc, inputDynamicShapes);
|
||||
std::shared_ptr<ngraph::Node> pooling = ngraph::builder::makeMaxPoolingV8(params[0], stride, dilation, padBegin, padEnd,
|
||||
kernel, roundingType, padType);
|
||||
kernel, roundingType, padType,
|
||||
indexElementType, axis);
|
||||
pooling->get_rt_info() = getCPUInfo();
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(pooling->output(0))};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "MaxPooling");
|
||||
@@ -375,15 +380,19 @@ const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsMax4D = {
|
||||
|
||||
const std::vector<LayerTestsDefinitions::maxPoolV8SpecificParams> paramsMaxV84D = {
|
||||
LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2}, {2, 2}, {1, 1}, {0, 0}, {0, 0},
|
||||
ngraph::element::Type_t::i32, 0,
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER },
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::maxPoolV8SpecificParams> paramsMaxV84D_ref = {
|
||||
LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2}, {2, 2}, {2, 2}, {0, 0}, {0, 0},
|
||||
ngraph::element::Type_t::i32, 0,
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER },
|
||||
LayerTestsDefinitions::maxPoolV8SpecificParams{ {4, 2}, {2, 2}, {1, 2}, {0, 0}, {0, 0},
|
||||
ngraph::element::Type_t::i32, 0,
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT },
|
||||
LayerTestsDefinitions::maxPoolV8SpecificParams{ {4, 2}, {2, 1}, {2, 2}, {0, 0}, {0, 0},
|
||||
ngraph::element::Type_t::i32, 0,
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT },
|
||||
};
|
||||
|
||||
@@ -467,15 +476,19 @@ const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsMax5D = {
|
||||
|
||||
const std::vector<LayerTestsDefinitions::maxPoolV8SpecificParams> paramsMaxV85D = {
|
||||
LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0},
|
||||
ngraph::element::Type_t::i32, 0,
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER },
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::maxPoolV8SpecificParams> paramsMaxV85D_ref = {
|
||||
LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2, 2}, {1, 1, 1}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0},
|
||||
ngraph::element::Type_t::i32, 0,
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER },
|
||||
LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 2, 2}, {1, 1, 1}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1},
|
||||
ngraph::element::Type_t::i32, 0,
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT },
|
||||
LayerTestsDefinitions::maxPoolV8SpecificParams{ {2, 3, 4}, {2, 2, 2}, {2, 1, 1}, {1, 1, 1}, {1, 2, 2},
|
||||
ngraph::element::Type_t::i32, 0,
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT },
|
||||
};
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
using namespace ngraph::helpers;
|
||||
using namespace LayerTestsDefinitions;
|
||||
using namespace ngraph::element;
|
||||
|
||||
namespace {
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
@@ -20,14 +21,20 @@ const std::vector<std::vector<size_t >> kernels = {{3, 3},
|
||||
{3, 5}};
|
||||
const std::vector<std::vector<size_t >> strides = {{1, 1},
|
||||
{1, 2}};
|
||||
const std::vector<std::vector<size_t >> dilations = {{1, 1},
|
||||
{1, 2}};
|
||||
const std::vector<std::vector<size_t >> padBegins = {{0, 0},
|
||||
{0, 2}};
|
||||
const std::vector<std::vector<size_t >> padEnds = {{0, 0},
|
||||
{0, 2}};
|
||||
const std::vector<ngraph::op::RoundingType> roundingTypes = {ngraph::op::RoundingType::CEIL,
|
||||
ngraph::op::RoundingType::FLOOR};
|
||||
const std::vector<ngraph::element::Type_t> indexElementTypes = {ngraph::element::Type_t::i32};
|
||||
const std::vector<int64_t> axes = {0, 2};
|
||||
const std::vector<size_t > inputShapeSmall = {1, 3, 30, 30};
|
||||
const std::vector<size_t > inputShapeLarge = {1, 3, 50, 50};
|
||||
|
||||
////* ========== Max Polling ========== */
|
||||
////* ========== Max Pooling ========== */
|
||||
/* +========== Explicit Pad Floor Rounding ========== */
|
||||
const auto maxPool_ExplicitPad_FloorRounding_Params = ::testing::Combine(
|
||||
::testing::Values(PoolingTypes::MAX),
|
||||
@@ -40,7 +47,7 @@ const auto maxPool_ExplicitPad_FloorRounding_Params = ::testing::Combine(
|
||||
::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_FloorRpunding, PoolingLayerTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_FloorRounding, PoolingLayerTest,
|
||||
::testing::Combine(
|
||||
maxPool_ExplicitPad_FloorRounding_Params,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
@@ -48,7 +55,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_FloorRpunding, PoolingLayerTe
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({1, 3, 50, 50})),
|
||||
::testing::Values(inputShapeLarge),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
PoolingLayerTest::getTestCaseName);
|
||||
|
||||
@@ -65,7 +72,7 @@ const auto maxPool_ExplicitPad_CeilRounding_Params = ::testing::Combine(
|
||||
::testing::Values(false) // placeholder value - exclude pad not applicable for max pooling
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_CeilRpunding, PoolingLayerTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_CeilRounding, PoolingLayerTest,
|
||||
::testing::Combine(
|
||||
maxPool_ExplicitPad_CeilRounding_Params,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
@@ -73,7 +80,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_ExplicitPad_CeilRpunding, PoolingLayerTes
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({1, 3, 50, 50})),
|
||||
::testing::Values(inputShapeLarge),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
PoolingLayerTest::getTestCaseName);
|
||||
|
||||
@@ -100,7 +107,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_ExplicitPad_CeilRounding, PoolingLayerTes
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
|
||||
::testing::Values(inputShapeSmall),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
PoolingLayerTest::getTestCaseName);
|
||||
|
||||
@@ -125,11 +132,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_ExplicitPad_FloorRounding, PoolingLayerTe
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
|
||||
::testing::Values(inputShapeSmall),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
PoolingLayerTest::getTestCaseName);
|
||||
|
||||
////* ========== Avg and Max Polling Cases ========== */
|
||||
////* ========== Avg and Max Pooling Cases ========== */
|
||||
/* ========== Valid Pad Rounding Not Applicable ========== */
|
||||
const auto allPools_ValidPad_Params = ::testing::Combine(
|
||||
::testing::Values(PoolingTypes::MAX, PoolingTypes::AVG),
|
||||
@@ -151,7 +158,61 @@ INSTANTIATE_TEST_SUITE_P(smoke_MAX_and_AVGPool_ValidPad, PoolingLayerTest,
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({1, 3, 50, 50})),
|
||||
::testing::Values(inputShapeLarge),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
PoolingLayerTest::getTestCaseName);
|
||||
|
||||
|
||||
|
||||
////* ========== MaxPool v8 ========== */
|
||||
///* +========== Explicit Pad Floor Rounding ========== */
|
||||
const auto maxPool8_ExplicitPad_FloorRounding_Params = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels),
|
||||
::testing::ValuesIn(strides),
|
||||
::testing::ValuesIn(dilations),
|
||||
::testing::ValuesIn(padBegins),
|
||||
::testing::ValuesIn(padEnds),
|
||||
::testing::ValuesIn(indexElementTypes),
|
||||
::testing::ValuesIn(axes),
|
||||
::testing::Values(ngraph::op::RoundingType::FLOOR),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool8_ExplicitPad_FloorRounding, MaxPoolingV8LayerTest,
|
||||
::testing::Combine(
|
||||
maxPool8_ExplicitPad_FloorRounding_Params,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(inputShapeSmall),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
MaxPoolingV8LayerTest::getTestCaseName);
|
||||
|
||||
/* ========== Explicit Pad Ceil Rounding ========== */
|
||||
const auto maxPool8_ExplicitPad_CeilRounding_Params = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels),
|
||||
::testing::Values(std::vector<size_t>({1, 1})),
|
||||
::testing::ValuesIn(dilations),
|
||||
::testing::ValuesIn(padBegins),
|
||||
::testing::ValuesIn(padEnds),
|
||||
::testing::ValuesIn(indexElementTypes),
|
||||
::testing::ValuesIn(axes),
|
||||
::testing::Values(ngraph::op::RoundingType::CEIL),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool8_ExplicitPad_CeilRounding, MaxPoolingV8LayerTest,
|
||||
::testing::Combine(
|
||||
maxPool8_ExplicitPad_CeilRounding_Params,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(inputShapeSmall),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
MaxPoolingV8LayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -54,6 +54,8 @@ typedef std::tuple<
|
||||
std::vector<size_t>, // Dilation
|
||||
std::vector<size_t>, // Pad begin
|
||||
std::vector<size_t>, // Pad end
|
||||
ngraph::element::Type_t, // Index element type
|
||||
int64_t, // Axis
|
||||
ngraph::op::RoundingType, // Rounding type
|
||||
ngraph::op::PadType // Pad type
|
||||
> maxPoolV8SpecificParams;
|
||||
|
||||
@@ -106,7 +106,9 @@ std::string MaxPoolingV8LayerTest::getTestCaseName(const testing::TestParamInfo<
|
||||
std::vector<size_t> padBegin, padEnd;
|
||||
ngraph::op::PadType padType;
|
||||
ngraph::op::RoundingType roundingType;
|
||||
std::tie(kernel, stride, dilation, padBegin, padEnd, roundingType, padType) = poolParams;
|
||||
ngraph::element::Type indexElementType;
|
||||
int64_t axis;
|
||||
std::tie(kernel, stride, dilation, padBegin, padEnd, indexElementType, axis, roundingType, padType) = poolParams;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||
@@ -115,6 +117,8 @@ std::string MaxPoolingV8LayerTest::getTestCaseName(const testing::TestParamInfo<
|
||||
result << "D" << CommonTestUtils::vec2str(dilation) << "_";
|
||||
result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
|
||||
result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
|
||||
result << "IET" << indexElementType << "_";
|
||||
result << "A" << axis << "_";
|
||||
result << "Rounding=" << roundingType << "_";
|
||||
result << "AutoPad=" << padType << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
@@ -201,7 +205,9 @@ void MaxPoolingV8LayerTest::SetUp() {
|
||||
std::vector<size_t> padBegin, padEnd;
|
||||
ngraph::op::PadType padType;
|
||||
ngraph::op::RoundingType roundingType;
|
||||
std::tie(kernel, stride, dilation, padBegin, padEnd, roundingType, padType) = poolParams;
|
||||
ngraph::element::Type indexElementType;
|
||||
int64_t axis;
|
||||
std::tie(kernel, stride, dilation, padBegin, padEnd, indexElementType, axis, roundingType, padType) = poolParams;
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
@@ -209,9 +215,17 @@ void MaxPoolingV8LayerTest::SetUp() {
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
|
||||
std::shared_ptr<ngraph::Node> maxPool = ngraph::builder::makeMaxPoolingV8(paramOuts[0], stride, dilation, padBegin, padEnd,
|
||||
kernel, roundingType, padType);
|
||||
kernel, roundingType, padType,
|
||||
indexElementType, axis);
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(maxPool->output(0))};
|
||||
const auto maxPoolV8_second_output_is_supported = targetDevice == CommonTestUtils::DEVICE_GPU;
|
||||
ngraph::ResultVector results;
|
||||
if (maxPoolV8_second_output_is_supported) {
|
||||
results = {std::make_shared<ngraph::opset3::Result>(maxPool->output(0)),
|
||||
std::make_shared<ngraph::opset3::Result>(maxPool->output(1))};
|
||||
} else {
|
||||
results = { std::make_shared<ngraph::opset3::Result>(maxPool->output(0)) };
|
||||
}
|
||||
function = std::make_shared<ngraph::Function>(results, params, "MaxPoolV8");
|
||||
}
|
||||
|
||||
|
||||
@@ -435,7 +435,9 @@ std::shared_ptr<Node> makeMaxPoolingV8(const ngraph::Output<Node> &in,
|
||||
const std::vector<size_t> &padsEnd,
|
||||
const std::vector<size_t> &kernel,
|
||||
const op::RoundingType &roundingType,
|
||||
const op::PadType &padType);
|
||||
const op::PadType &padType,
|
||||
const ov::element::Type &indexElementType,
|
||||
const int64_t axis);
|
||||
|
||||
std::shared_ptr<Node> makeROIPooling(const Output<Node>& input,
|
||||
const Output<Node>& coords,
|
||||
|
||||
@@ -42,9 +42,12 @@ std::shared_ptr<Node> makeMaxPoolingV8(const ngraph::Output<Node> &in,
|
||||
const std::vector<size_t> &padsEnd,
|
||||
const std::vector<size_t> &kernel,
|
||||
const op::RoundingType &roundingType,
|
||||
const op::PadType &padType) {
|
||||
const op::PadType &padType,
|
||||
const ov::element::Type &indexElementType,
|
||||
const int64_t axis) {
|
||||
std::shared_ptr<ngraph::Node> pooling = std::make_shared<ngraph::opset8::MaxPool>(in, strides, dilation, padsBegin, padsEnd,
|
||||
kernel, roundingType, padType);
|
||||
kernel, roundingType, padType,
|
||||
indexElementType, axis);
|
||||
return pooling;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user