[GPU] added 5d supporting to interpolate onnx and tests (#12821)

This commit is contained in:
OlehKravchyshyn 2022-10-05 11:20:51 +03:00 committed by GitHub
parent 29a15e39da
commit b73d3370d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 395 additions and 67 deletions

View File

@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <set>
#include "resample_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
@ -169,55 +171,36 @@ struct resample_impl : typed_primitive_impl_ocl<resample> {
namespace detail {
attach_resample_impl::attach_resample_impl() {
implementation_map<resample>::add(impl_types::ocl, resample_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::set<implementation_map<resample>::key_type> keys;
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
const auto types = {data_types::f16, data_types::f32, data_types::i8, data_types::u8, data_types::i32};
const auto formats = {
format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv32_fsv16,
format::bs_fs_yx_bsv32_fsv32,
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
format::bfzyx,
format::b_fs_zyx_fsv16,
format::b_fs_zyx_fsv32,
format::bs_fs_zyx_bsv16_fsv32,
format::bs_fs_zyx_bsv16_fsv16,
format::bs_fs_zyx_bsv32_fsv32,
format::bs_fs_zyx_bsv32_fsv16,
};
for (const auto type : types) {
for (const auto format : formats) {
keys.emplace(type, format);
}
}
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
keys.emplace(data_types::f32, format::yxfb);
keys.emplace(data_types::f16, format::yxfb);
keys.emplace(data_types::f16, format::fs_b_yx_fsv32);
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
});
implementation_map<resample>::add(impl_types::ocl, resample_impl::create, keys);
}
} // namespace detail

View File

@ -1430,7 +1430,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.type() != cldnn::adaptive_pooling::type_id() &&
prim.type() != cldnn::bucketize::type_id() &&
prim.type() != cldnn::roll::type_id() &&
prim.type() != cldnn::prior_box::type_id()) {
prim.type() != cldnn::prior_box::type_id() &&
prim.type() != cldnn::resample::type_id()) {
can_use_fsv16 = false;
}

View File

@ -60,7 +60,8 @@ ResampleKernelBase::DispatchData ResampleKernelBase::SetDefault(const kernel_sel
dims_by_gws = {{ Tensor::DataChannelName::X },
{ Tensor::DataChannelName::Y, Tensor::DataChannelName::Z },
{ Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH }};
} else if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) {
} else if ( (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) &&
out.Dimentions() <= 4 ) {
dispatchData.gws = { Align(out.X().v, 32), out.Y().v, out.Batch().v };
dims_by_gws = {{ Tensor::DataChannelName::X },
{ Tensor::DataChannelName::Y },
@ -79,7 +80,8 @@ ResampleKernelBase::DispatchData ResampleKernelBase::SetDefault(const kernel_sel
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo, in_layout, out_layout, dims_by_gws);
if (arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) {
if ((arg.resampleType == ResampleType::BILINEAR_INTERP || arg.resampleType == ResampleType::LINEAR_ONNX) &&
out.Dimentions() <= 4) {
dispatchData.lws[0] = 32;
dispatchData.lws[1] = 1;
dispatchData.lws[2] = 1;
@ -108,7 +110,8 @@ bool ResampleKernelBase::Validate(const Params& p, const optional_params& o) con
if ((input.GetDType() == Datatype::UINT8 || input.GetDType() == Datatype::INT8) &&
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
params.resampleType != ResampleType::CAFFE_BILINEAR_INTERP &&
params.resampleType != ResampleType::BILINEAR_INTERP)
params.resampleType != ResampleType::BILINEAR_INTERP &&
params.resampleType != ResampleType::LINEAR_ONNX)
return false;
return true;

View File

@ -46,6 +46,8 @@ inline int FUNC(get_nearest_val)(float num, bool is_downsample)
inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
{
if (scale == 1.0f)
return num;
#if defined(COORD_TRANS_MODE_HALF_PIXEL)
return (num + 0.5f) * scale - 0.5f;
#elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL)
@ -251,6 +253,8 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
#endif // HAS_FUSED_OPS
output[FUNC_CALL(get_output_index)(out_coords[0], out_coords[1], out_coords[2], out_coords[3], out_coords[4])] = res;
#elif defined(SAMPLE_TYPE_LINEAR_ONNX) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
#if OUTPUT_DIMS <= 4
const int ox = get_global_id(0);
const int oy = get_global_id(1);
const int feature = 0;
@ -286,21 +290,19 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
#endif
unroll_for(int in_f = 0; in_f < OUTPUT_FEATURE_NUM; in_f++) {
INPUT0_TYPE top_left = tlOutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x1)];
INPUT0_TYPE top_right = trOutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x2)];
INPUT0_TYPE bottom_left = blOutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x1)];
INPUT0_TYPE bottom_right = brOutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x2)];
#else
unroll_for(int in_f = 0; in_f < OUTPUT_FEATURE_NUM; in_f++) {
INPUT0_TYPE top_left = input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x1)];
INPUT0_TYPE top_right = input[INPUT0_GET_INDEX(batch, in_f, in_y1, in_x2)];
INPUT0_TYPE bottom_left = input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x1)];
INPUT0_TYPE bottom_right = input[INPUT0_GET_INDEX(batch, in_f, in_y2, in_x2)];
#if PADDING_USED == 1
if (tlOutOfBounds)
top_left = INPUT0_VAL_ZERO;
if (trOutOfBounds)
top_right = INPUT0_VAL_ZERO;
if (blOutOfBounds)
bottom_left = INPUT0_VAL_ZERO;
if (brOutOfBounds)
bottom_right = INPUT0_VAL_ZERO;
#endif
ACCUMULATOR_TYPE interp_val = TO_ACCUMULATOR_TYPE(dx2 * dy2 * top_left) +
@ -318,6 +320,96 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
#endif
output[OUTPUT_GET_INDEX(batch, in_f, oy, ox)] = res;
}
#endif // #if OUTPUT_DIMS <= 4
#if OUTPUT_DIMS == 5
const int ox = get_global_id(0);
const int oy = (int)get_global_id(1) % OUTPUT_SIZE_Y;
const int oz = (int)get_global_id(1) / OUTPUT_SIZE_Y;
const int feature = (int)get_global_id(2) % OUTPUT_FEATURE_NUM;
const int batch = (int)get_global_id(2) / OUTPUT_FEATURE_NUM;
const int PADDED_Z = in_size[2] + PADS_BEGIN[2] + PADS_END[2];
const int PADDED_Y = in_size[3] + PADS_BEGIN[3] + PADS_END[3];
const int PADDED_X = in_size[4] + PADS_BEGIN[4] + PADS_END[4];
const float ix = FUNC_CALL(get_original_coordinate)(ox, SCALES[4], out_size[4], PADDED_X);
const float iy = FUNC_CALL(get_original_coordinate)(oy, SCALES[3], out_size[3], PADDED_Y);
const float iz = FUNC_CALL(get_original_coordinate)(oz, SCALES[2], out_size[2], PADDED_Z);
float in_z = fmax(0, fmin(iz, PADDED_Z - 1));
float in_y = fmax(0, fmin(iy, PADDED_Y - 1));
float in_x = fmax(0, fmin(ix, PADDED_X - 1));
int in_z1 = min((int)in_z, PADDED_Z - 1);
int in_z2 = min(in_z1 + 1, PADDED_Z - 1);
int in_y1 = min((int)in_y, PADDED_Y - 1);
int in_y2 = min(in_y1 + 1, PADDED_Y - 1);
int in_x1 = min((int)in_x, PADDED_X - 1);
int in_x2 = min(in_x1 + 1, PADDED_X - 1);
const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f;
const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f;
const ACCUMULATOR_TYPE dz1 = (in_z1 != in_z2) ? TO_ACCUMULATOR_TYPE(fabs(in_z - in_z1)) : 0.5f;
const ACCUMULATOR_TYPE dz2 = (in_z1 != in_z2) ? TO_ACCUMULATOR_TYPE(fabs(in_z - in_z2)) : 0.5f;
#if PADDING_USED == 1
in_z1 -= PADS_BEGIN[2];
in_z2 -= PADS_BEGIN[2];
in_y1 -= PADS_BEGIN[3];
in_y2 -= PADS_BEGIN[3];
in_x1 -= PADS_BEGIN[4];
in_x2 -= PADS_BEGIN[4];
bool BackTopLOutOfBounds = in_z1 < 0 || in_z1 >= in_size[2] || in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0|| in_x1 >= in_size[4];
bool BackTopROutOfBounds = in_z1 < 0 || in_z1 >= in_size[2] || in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
bool BackBottomLOutOfBounds = in_z1 < 0 || in_z1 >= in_size[2] || in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
bool BackBottomROutOfBounds = in_z1 < 0 || in_z1 >= in_size[2] || in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
bool FrontTopLOutOfBounds = in_z2 < 0 || in_z2 >= in_size[2] || in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
bool FrontTopROutOfBounds = in_z2 < 0 || in_z2 >= in_size[2] || in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
bool FrontBottomLOutOfBounds = in_z2 < 0 || in_z2 >= in_size[2] || in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
bool FrontBottomROutOfBounds = in_z2 < 0 || in_z2 >= in_size[2] || in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
OUTPUT_TYPE x111 = BackTopLOutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, feature, in_z1, in_y1, in_x1)];
OUTPUT_TYPE x211 = BackTopROutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, feature, in_z1, in_y1, in_x2)];
OUTPUT_TYPE x121 = BackBottomLOutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, feature, in_z1, in_y2, in_x1)];
OUTPUT_TYPE x221 = BackBottomROutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, feature, in_z1, in_y2, in_x2)];
OUTPUT_TYPE x112 = FrontTopLOutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, feature, in_z2, in_y1, in_x1)];
OUTPUT_TYPE x212 = FrontTopROutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, feature, in_z2, in_y1, in_x2)];
OUTPUT_TYPE x122 = FrontBottomLOutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, feature, in_z2, in_y2, in_x1)];
OUTPUT_TYPE x222 = FrontBottomROutOfBounds ? INPUT0_VAL_ZERO : input[INPUT0_GET_INDEX(batch, feature, in_z2, in_y2, in_x2)];
#else
OUTPUT_TYPE x111 = input[INPUT0_GET_INDEX(batch, feature, in_z1, in_y1, in_x1)];
OUTPUT_TYPE x211 = input[INPUT0_GET_INDEX(batch, feature, in_z1, in_y1, in_x2)];
OUTPUT_TYPE x121 = input[INPUT0_GET_INDEX(batch, feature, in_z1, in_y2, in_x1)];
OUTPUT_TYPE x221 = input[INPUT0_GET_INDEX(batch, feature, in_z1, in_y2, in_x2)];
OUTPUT_TYPE x112 = input[INPUT0_GET_INDEX(batch, feature, in_z2, in_y1, in_x1)];
OUTPUT_TYPE x212 = input[INPUT0_GET_INDEX(batch, feature, in_z2, in_y1, in_x2)];
OUTPUT_TYPE x122 = input[INPUT0_GET_INDEX(batch, feature, in_z2, in_y2, in_x1)];
OUTPUT_TYPE x222 = input[INPUT0_GET_INDEX(batch, feature, in_z2, in_y2, in_x2)];
#endif
ACCUMULATOR_TYPE interp_val = dx2 * dy2 * dz2 * x111 + dx1 * dy2 * dz2 * x211;
interp_val += dx2 * dy1 * dz2 * x121 + dx1 * dy1 * dz2 * x221;
interp_val += dx2 * dy2 * dz1 * x112 + dx1 * dy2 * dz1 * x212;
interp_val += dx2 * dy1 * dz1 * x122 + dx1 * dy1 * dz1 * x222;
#if HAS_FUSED_OPS
#define OF_ID (feature)
FUSED_OPS;
OUTPUT_TYPE res = FUSED_OPS_RESULT;
#undef OF_ID
#else
OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
#endif
output[OUTPUT_GET_INDEX(batch, feature, oz, oy, ox)] = res;
#endif // #if OUTPUT_DIMS == 5
#elif defined(SAMPLE_TYPE_INTERP) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
const int ox = get_global_id(0);
const int oy = get_global_id(1);

View File

@ -52,16 +52,20 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
auto interpolateMode = attrs.mode;
if (interpolateMode == ov::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX) {
if (inputRank != 2 && inputRank != 4)
IE_THROW() << "mode 'linear_onnx' supports only 2D or 4D tensors";
if (axes.size() != 2 && inputRank != axes.size())
IE_THROW() << "mode 'linear_onnx' supports only axes with size 2 or equal to input rank";
if (inputRank != 2 && inputRank != 4 && inputRank != 5)
IE_THROW() << "mode 'linear_onnx' supports only 2D or 4D, 5D tensors";
if (axes.size() != 2 && axes.size() != 3 && inputRank != axes.size())
IE_THROW() << "mode 'linear_onnx' supports only axes with size 2, 3 or equal to input rank";
bool correctAxes =
(axes[0] == 0 && axes[1] == 1) ||
(((axes.size() == 2 || axes.size() == 4) && inputRank != 5) &&
((axes[0] == 0 && axes[1] == 1) ||
(axes[0] == 1 && axes[1] == 0) ||
(axes[0] == 2 && axes[1] == 3) ||
(axes[0] == 3 && axes[1] == 2);
if (axes.size() == 4 && inputRank == 4) {
(axes[0] == 3 && axes[1] == 2))) ||
((axes.size() == 3 || axes.size() == 5) && inputRank == 5 &&
((axes[0] == 0 && axes[1] == 1 && axes[2] == 2) ||
(axes[0] == 2 && axes[1] == 3 && axes[2] == 4)));
if ((axes.size() == 4 && inputRank == 4) || (axes.size() == 5 && inputRank == 5)) {
for (size_t i = 0; i < axes.size(); i++) {
if (std::find(axes.begin(), axes.end(), i) == axes.end()) {
correctAxes = false;
@ -72,7 +76,7 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
if (!correctAxes)
IE_THROW() <<
"mode 'linear_onnx' supports only case when axes = {2, 3} or "
"axes = {0, 1} or axes = {0, 1, 2, 3}";
"axes = {0, 1} or axes = {0, 1, 2, 3} or axes = {2, 3, 4} for 5d";
}
auto resamplePrim = cldnn::resample(layerName,

View File

@ -1703,6 +1703,165 @@ TEST(resample_gpu, interpolate_in2x2x3x2_linear) {
}
}
static tensor create_tensor(const std::vector<int64_t>& shape) {
switch (shape.size()) {
case 4:
return tensor{batch(shape[0]), feature(shape[1]), spatial(shape[2], shape[3])};
break;
case 5:
return tensor{batch(shape[0]), feature(shape[1]), spatial(shape[4], shape[3], shape[2])};
break;
default:
throw std::runtime_error("Only 4d or 5d formats are supported");
}
}
template <cldnn::format::type FMT>
struct format_wrapper {
static constexpr format fmt = FMT;
};
template <typename T>
struct onnx_5d_format : public ::testing::Test {
onnx_5d_format() : shapes_and_attrs {// resize_downsample_scales_linear
{{1, 1, 3, 2, 4},
{2, 3, 4},
{1, 1, 2, 1, 2},
{0.8f, 0.6f, 0.6f},
resample::InterpolateOp::CoordinateTransformMode::HALF_PIXEL,
resample::InterpolateOp::ShapeCalcMode::SCALES},
// resize_downsample_scales_linear_align_corners
{{1, 1, 3, 2, 4},
{2, 3, 4},
{1, 1, 2, 1, 2},
{0.8f, 0.6f, 0.6f},
resample::InterpolateOp::CoordinateTransformMode::ALIGN_CORNERS,
resample::InterpolateOp::ShapeCalcMode::SCALES},
// resize_upsample_scales_linear
{{1, 1, 2, 2, 2},
{2, 3, 4},
{1, 1, 4, 4, 4},
{2.0, 2.0, 2.0},
resample::InterpolateOp::CoordinateTransformMode::HALF_PIXEL,
resample::InterpolateOp::ShapeCalcMode::SCALES},
// resize_upsample_scales_linear_align_corners
{{1, 1, 2, 2, 2},
{2, 3, 4},
{1, 1, 4, 4, 4},
{2.0, 2.0, 2.0},
resample::InterpolateOp::CoordinateTransformMode::ALIGN_CORNERS,
resample::InterpolateOp::ShapeCalcMode::SCALES},
// resize_downsample_sizes_linear_pytorch_half_pixel
{{1, 1, 2, 4, 4},
{2, 3, 4},
{1, 1, 1, 3, 1},
{0.5, 0.75, 0.25},
resample::InterpolateOp::CoordinateTransformMode::PYTORCH_HALF_PIXEL,
resample::InterpolateOp::ShapeCalcMode::SIZES}
}
, input_data_list {
// resize_downsample_scales_linear
{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f,
13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f},
// resize_downsample_scales_linear_align_corners
{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f,
13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f},
// resize_upsample_scales_linear
{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f},
// resize_upsample_scales_linear_align_corners
{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f},
// resize_downsample_sizes_linear_pytorch_half_pixel
{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f,
23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f}}
, expected_results {
// resize_downsample_scales_linear
{3.6666665, 5.333333, 13.666666, 15.333333},
// resize_downsample_scales_linear_align_corners
{1.0, 4.0, 17.0, 20.0},
// resize_upsample_scales_linear
{1.0, 1.25, 1.75, 2.0, 1.5, 1.75, 2.25, 2.5, 2.5, 2.75, 3.25, 3.5, 3.0, 3.25, 3.75, 4.0,
2.0, 2.25, 2.75, 3.0, 2.5, 2.75, 3.25, 3.5, 3.5, 3.75, 4.25, 4.5, 4.0, 4.25, 4.75, 5.0,
4.0, 4.25, 4.75, 5.0, 4.5, 4.75, 5.25, 5.5, 5.5, 5.75, 6.25, 6.5, 6.0, 6.25, 6.75, 7.0,
5.0, 5.25, 5.75, 6.0, 5.5, 5.75, 6.25, 6.5, 6.5, 6.75, 7.25, 7.5, 7.0, 7.25, 7.75, 8.0},
// resize_upsample_scales_linear_align_corners
{1.0, 1.3333333, 1.6666667, 2.0, 1.6666666, 2.0, 2.3333335, 2.6666667, 2.3333333, 2.6666665,
3.0, 3.3333335, 3.0, 3.3333333, 3.6666665, 4.0, 2.3333335, 2.6666665, 3.0, 3.3333333,
3.0, 3.333333, 3.6666665, 3.9999995, 3.6666665, 4.0, 4.3333335, 4.6666665, 4.333333, 4.6666665,
4.9999995, 5.333333, 3.6666667, 4.0, 4.3333335, 4.6666665, 4.3333335, 4.6666665, 5.0, 5.333333,
5.0, 5.3333335, 5.666667, 6.0, 5.666667, 5.9999995, 6.333333, 6.666667, 5.0, 5.333333,
5.6666665, 6.0, 5.666667, 5.9999995, 6.333333, 6.666666, 6.3333335, 6.666666, 7.0, 7.3333335,
7.0, 7.333333, 7.6666675, 8.0},
// resize_downsample_sizes_linear_pytorch_half_pixel
{1.6666667, 7.0, 12.333333}}
, fmt{T::fmt}
{}
struct ShapesAndAttrs {
std::vector<int64_t> input_data_shape;
std::vector<int64_t> axes;
std::vector<int64_t> out_shape;
std::vector<float> scales_data;
resample::InterpolateOp::CoordinateTransformMode transform_mode;
resample::InterpolateOp::ShapeCalcMode calculation_mode;
};
std::vector<ShapesAndAttrs> shapes_and_attrs;
std::vector<std::vector<float>> input_data_list;
std::vector<std::vector<float>> expected_results;
format fmt;
};
using cldnn_5d_formats = testing::Types<format_wrapper<format::bfzyx>,
format_wrapper<format::bs_fs_zyx_bsv16_fsv32>,
format_wrapper<format::bs_fs_zyx_bsv16_fsv16>,
format_wrapper<format::bs_fs_zyx_bsv32_fsv32>,
format_wrapper<format::bs_fs_zyx_bsv32_fsv16>>;
TYPED_TEST_SUITE(onnx_5d_format, cldnn_5d_formats);
TYPED_TEST(onnx_5d_format, interpolate_linear_onnx5d)
{
auto& engine = get_test_engine();
std::size_t i = 0;
for (const auto& s : this->shapes_and_attrs) {
tensor input_tensor = create_tensor(s.input_data_shape);
auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, input_tensor });;
//auto output_tensor = create_tensor(s.out_shape);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(reorder("input_reordered", "input", this->fmt, data_types::f32));
int32_t antialias = 0;
float cube_coeff = -0.75f;
resample::InterpolateOp::InterpolateMode mode = resample::InterpolateOp::InterpolateMode::LINEAR_ONNX;
resample::InterpolateOp::CoordinateTransformMode ctm = s.transform_mode;
resample::InterpolateOp::ShapeCalcMode shapeCalcMode = s.calculation_mode;
topology.add(resample("interpolate", "input_reordered", s.out_shape, s.scales_data, s.axes,
{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, antialias, cube_coeff, mode, shapeCalcMode, ctm));
topology.add(reorder("output", "interpolate", format::bfzyx, data_types::f32));
set_values(input, this->input_data_list[i]);
cldnn::network net {engine, topology };
net.set_input_data("input", input);
auto outputs = net.execute();
auto output = outputs.at("output").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
ASSERT_EQ(this->expected_results[i].size(), output_ptr.size());
for (size_t j = 0; j < this->expected_results[i].size(); ++j) {
//EXPECT_TRUE(are_equal(expected_results[i][j], output_ptr[i])) << i;
EXPECT_NEAR(this->expected_results[i][j], output_ptr[j], 0.001) << j;
}
++i;
}
}
TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) {
// Input : 1x1x2x4
// Output : 1x1x1x2

View File

@ -9,6 +9,30 @@
using namespace LayerTestsDefinitions;
class GPUInterpolateLayerTest : public InterpolateLayerTest {
protected:
void SetUp() override {
InterpolateLayerTest::SetUp();
InterpolateLayerTestParams params = GetParam();
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::tie(std::ignore, netPrecision, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
std::ignore, targetDevice, std::ignore) = params;
// Some rounding float to integer types on GPU may differ from CPU, and as result,
// the actual values may differ from reference ones on 1 when the float is very close to an integer,
// e.g 6,0000023 calculated on CPU may be cast to 5 by OpenCL convert_uchar function.
// That is why the threshold is set 1.f for integer types.
if (targetDevice == "GPU" &&
(netPrecision == InferenceEngine::Precision::U8 || netPrecision == InferenceEngine::Precision::I8)) {
threshold = 1.f;
}
}
};
TEST_P(GPUInterpolateLayerTest, CompareWithRefs) {
Run();
}
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
@ -16,6 +40,13 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
};
const std::vector<InferenceEngine::Precision> netOnnx5dPrecisions = {
InferenceEngine::Precision::I8,
InferenceEngine::Precision::U8,
InferenceEngine::Precision::FP16,
InferenceEngine::Precision::FP32,
};
const std::vector<std::vector<size_t>> inShapes = {
{1, 1, 23, 23},
};
@ -24,6 +55,14 @@ const std::vector<std::vector<size_t>> targetShapes = {
{1, 1, 46, 46},
};
const std::vector<std::vector<size_t>> in5dShapes = {
{1, 1, 2, 2, 2},
};
const std::vector<std::vector<size_t>> target5dShapes = {
{1, 1, 4, 4, 4},
};
const std::vector<ngraph::op::v4::Interpolate::InterpolateMode> modesWithoutNearest = {
ngraph::op::v4::Interpolate::InterpolateMode::linear,
ngraph::op::v4::Interpolate::InterpolateMode::cubic,
@ -34,6 +73,10 @@ const std::vector<ngraph::op::v4::Interpolate::InterpolateMode> nearestMode = {
ngraph::op::v4::Interpolate::InterpolateMode::nearest,
};
const std::vector<ngraph::op::v4::Interpolate::InterpolateMode> linearOnnxMode = {
ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx,
};
const std::vector<ngraph::op::v4::Interpolate::CoordinateTransformMode> coordinateTransformModes = {
ngraph::op::v4::Interpolate::CoordinateTransformMode::tf_half_pixel_for_nn,
ngraph::op::v4::Interpolate::CoordinateTransformMode::pytorch_half_pixel,
@ -64,6 +107,15 @@ const std::vector<std::vector<size_t>> pads = {
{0, 0, 0, 0},
};
const std::vector<std::vector<size_t>> pads5dbegin = {
{0, 0, 1, 1, 1},
{0, 0, 0, 0, 0},
};
const std::vector<std::vector<size_t>> pads5dend = {
{0, 0, 1, 1, 1},
{0, 0, 0, 0, 0},
};
const std::vector<bool> antialias = {
// Not enabled in Inference Engine
// true,
@ -84,6 +136,14 @@ const std::vector<std::vector<float>> defaultScales = {
{1.f, 1.f, 2.f, 2.f}
};
const std::vector<std::vector<int64_t>> default5dAxes = {
{0, 1, 2, 3, 4}
};
const std::vector<std::vector<float>> default5dScales = {
{1.f, 1.f, 2.f, 2.f, 2.f}
};
std::map<std::string, std::string> additional_config = {};
const auto interpolateCasesWithoutNearest = ::testing::Combine(
@ -122,6 +182,18 @@ const auto interpolateCasesNearesMode = ::testing::Combine(
::testing::ValuesIn(defaultAxes),
::testing::ValuesIn(defaultScales));
const auto interpolate5dCasesLinearOnnxMode = ::testing::Combine(
::testing::ValuesIn(linearOnnxMode),
::testing::ValuesIn(shapeCalculationMode),
::testing::ValuesIn(coordinateTransformModes),
::testing::ValuesIn(nearestModes),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads5dbegin),//pad begin
::testing::ValuesIn(pads5dend),//pad ends
::testing::ValuesIn(cubeCoefs),
::testing::ValuesIn(default5dAxes),
::testing::ValuesIn(default5dScales));
INSTANTIATE_TEST_SUITE_P(smoke_Interpolate_Basic, InterpolateLayerTest, ::testing::Combine(
interpolateCasesWithoutNearest,
::testing::ValuesIn(netPrecisions),
@ -161,4 +233,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Interpolate_Nearest, InterpolateLayerTest, ::test
::testing::Values(additional_config)),
InterpolateLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Interpolate_5dLinearOnnx, GPUInterpolateLayerTest, ::testing::Combine(
interpolate5dCasesLinearOnnxMode,
::testing::ValuesIn(netOnnx5dPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::ValuesIn(in5dShapes),
::testing::ValuesIn(target5dShapes),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(additional_config)),
InterpolateLayerTest::getTestCaseName);
} // namespace