[IE CLDNN] FP16 support for some quantized kernels (#1855)

This commit is contained in:
Vladimir Paramuzov 2020-08-24 08:47:45 +03:00 committed by GitHub
parent d4567d5ab5
commit c8aa3a7f2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 206 additions and 10 deletions

View File

@ -23,11 +23,16 @@ constexpr size_t sub_group_size = 16;
ParamsKey ConvolutionKernel_b_fs_yx_fsv4_int8::GetSupportedKey() const {
ParamsKey k;
k.EnableOutputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableTensorOffset();

View File

@ -73,6 +73,7 @@ ParamsKey ConvolutionKernel_imad::GetSupportedKey() const {
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputWeightsType(WeightsType::UINT8);

View File

@ -74,11 +74,15 @@ ParamsKey ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputWeightsType(WeightsType::UINT8);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableDifferentTypes();

View File

@ -1,4 +1,4 @@
// Copyright (c) 2018-2019 Intel Corporation
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -72,11 +72,15 @@ ParamsKey ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputWeightsType(WeightsType::UINT8);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableDifferentTypes();

View File

@ -34,6 +34,7 @@ ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetSupportedKey() co
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);

View File

@ -34,6 +34,7 @@ ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetSupportedKey() co
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);

View File

@ -25,10 +25,14 @@ ParamsKey ConvolutionKernel_mmad_b_fs_yx_fsv32::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);

View File

@ -26,10 +26,14 @@ ParamsKey ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableTensorOffset();

View File

@ -27,10 +27,14 @@ ParamsKey ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::GetSupportedKey() const
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);

View File

@ -25,10 +25,14 @@ ParamsKey ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableOutputLayout(DataLayout::byxf_af32);

View File

@ -43,11 +43,14 @@ ParamsKey ConvolutionKernel_mmad_blocks::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputLayout(DataLayout::byxf_af32);
k.EnableOutputLayout(DataLayout::byxf_af32);
k.EnableTensorOffset();

View File

@ -31,10 +31,12 @@ ParamsKey DeconvolutionKernel_imad_along_f_tile_bfx::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputWeightsType(WeightsType::UINT8);

View File

@ -25,10 +25,12 @@ ParamsKey DeconvolutionKernel_imad_ref::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputWeightsType(WeightsType::UINT8);

View File

@ -1,4 +1,4 @@
// Copyright (c) 2019 Intel Corporation
// Copyright (c) 2019-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -29,6 +29,7 @@ ParamsKey FullyConnectedKernelIMAD::GetSupportedKey() const {
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);

View File

@ -25,11 +25,14 @@ ParamsKey FullyConnectedKernelMMAD::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableDifferentInputWeightsTypes();
k.EnableDifferentTypes();

View File

@ -23,10 +23,12 @@ ParamsKey GemmKernelMMADint8::GetSupportedKey() const {
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::bfzyx);

View File

@ -23,10 +23,12 @@ ParamsKey GemmKernelMMADslmInt8::GetSupportedKey() const {
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::bfzyx);

View File

@ -30,10 +30,12 @@ ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const {
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);

View File

@ -1,4 +1,4 @@
// Copyright (c) 2018-2019 Intel Corporation
// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -25,10 +25,12 @@ ParamsKey MVNKernelRef::GetSupportedKey() const {
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();

View File

@ -25,6 +25,7 @@ ParamsKey PoolingKernelGPU_b_fs_zyx_fsv16_imad::GetSupportedKey() const {
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);

View File

@ -30,6 +30,7 @@ ParamsKey Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::GetSupportedKey() const {
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);

View File

@ -1,5 +1,5 @@
/*
// Copyright (c) 2019 Intel Corporation
// Copyright (c) 2019-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -26,10 +26,14 @@ ParamsKey ReduceKernelRef::GetSupportedKey() const {
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::bfzyx);

View File

@ -1,4 +1,4 @@
// Copyright (c) 2019 Intel Corporation
// Copyright (c) 2019-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -92,7 +92,7 @@ KERNEL(fully_connected_gpu_IMAD)(
output[out_index] = res;
#else
output[out_index] = dequantized;
output[out_index] = TO_OUTPUT_TYPE(dequantized);
#endif
}

View File

@ -191,9 +191,11 @@ attach_eltwise_gpu::attach_eltwise_gpu() {
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), eltwise_gpu::create },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), eltwise_gpu::create },
//
{ std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), eltwise_gpu::create }});

View File

@ -1,5 +1,5 @@
/*
// Copyright (c) 2016-2019 Intel Corporation
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -97,9 +97,11 @@ attach_resample_gpu::attach_resample_gpu() {
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf_af32), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf_af32), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), resample_gpu::create},
{std::make_tuple(engine_types::ocl, data_types::u8, format::byxf_af32), resample_gpu::create}});
}

View File

@ -2353,6 +2353,21 @@ TEST_P(fc_int8_scale, basic) {
execute(p);
}
TEST_P(fc_int8_scale, fp16_scale_out) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / p.kernel.count())),
fully_connected("fc_prim", "input", "weights", "bias", data_types::f32),
scale("scale", "fc_prim", "scale_data", optional_data_type{data_types::f16}),
reorder("reorder_bfyx", "scale", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
INSTANTIATE_TEST_CASE_P(fusings_gpu, fc_int8_scale,
::testing::ValuesIn(std::vector<bc_test_params>{
bc_test_params{ CASE_FC_U8S8_1, 2, 3 },
@ -2523,6 +2538,45 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, gemm_2in_quantize_u8,
gemm_test_params{ CASE_GEMM_2IN_U8U8_3, 3, 4 },
}), );
class gemm_2in_scale : public GemmFusingTest {};
TEST_P(gemm_2in_scale, basic) {
auto p = GetParam();
create_topologies(input_layout("input0", get_input_layout(p, 0)),
input_layout("input1", get_input_layout(p, 1)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
gemm("gemm_prim", { "input0", "input1" }, data_types::f32),
scale("scale", "gemm_prim", "scale_data"),
reorder("reorder_bfyx", "scale", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(gemm_2in_scale, fp16_scale_out) {
auto p = GetParam();
create_topologies(input_layout("input0", get_input_layout(p, 0)),
input_layout("input1", get_input_layout(p, 1)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
gemm("gemm_prim", { "input0", "input1" }, data_types::f32),
scale("scale", "gemm_prim", "scale_data", optional_data_type{data_types::f16}),
reorder("reorder_bfyx", "scale", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
INSTANTIATE_TEST_CASE_P(fusings_gpu, gemm_2in_scale,
::testing::ValuesIn(std::vector<gemm_test_params>{
gemm_test_params{ CASE_GEMM_2IN_FP32_1, 3, 4 },
gemm_test_params{ CASE_GEMM_2IN_FP16_1, 3, 4 },
gemm_test_params{ CASE_GEMM_2IN_U8U8_1, 3, 4 },
gemm_test_params{ CASE_GEMM_2IN_U8U8_2, 3, 4 },
gemm_test_params{ CASE_GEMM_2IN_U8U8_3, 3, 4 },
}), );
class gemm_2in_act_scale_quantize_i8 : public GemmFusingTest {};
TEST_P(gemm_2in_act_scale_quantize_i8, basic) {
auto p = GetParam();
@ -3882,6 +3936,74 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_actv,
deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 3 },
}), );
class deconv_scale : public DeconvolutionFusingTest {};
TEST_P(deconv_scale, basic) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
scale("scale", "deconv", "scale_data"),
reorder("out", "scale", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(deconv_scale, fp16_scale_out) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
scale("scale", "deconv", "scale_data", optional_data_type{data_types::f16}),
reorder("out", "scale", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale,
::testing::ValuesIn(std::vector<deconv_test_params>{
deconv_test_params{ CASE_DECONV_U8S8_1, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_2, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_3, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_4, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_5, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_6, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_7, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_8, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_1, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_2, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_3, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_4, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_5, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_6, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_7, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_8, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 3 },
deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 3 },
deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 3 },
}), );
class deconv_actv_eltw_actv : public DeconvolutionFusingTest {};
TEST_P(deconv_actv_eltw_actv, basic) {
auto p = GetParam();
@ -4440,6 +4562,19 @@ TEST_P(pooling_f32_scale, basic) {
execute(p);
}
TEST_P(pooling_f32_scale, fp16_scale_out) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / tensor{1, 1, 3, 3}.count())),
pooling("pooling", "input", p.pool_mode, tensor{1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}),
scale("scale", "pooling", "scale_data", optional_data_type{data_types::f16}),
reorder("output_reorder", "scale", format::bfyx, data_types::f32));
tolerance = 1e-5f;
execute(p);
}
INSTANTIATE_TEST_CASE_P(fusings_gpu,
pooling_f32_scale,
::testing::ValuesIn(std::vector<pooling_test_params>{