diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp index 3a9457bcd4f..8b43b591405 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv4_int8.cpp @@ -23,11 +23,16 @@ constexpr size_t sub_group_size = 16; ParamsKey ConvolutionKernel_b_fs_yx_fsv4_int8::GetSupportedKey() const { ParamsKey k; - k.EnableOutputDataType(Datatype::F32); k.EnableInputDataType(Datatype::INT8); - k.EnableInputWeightsType(WeightsType::INT8); - k.EnableOutputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); + + k.EnableInputWeightsType(WeightsType::INT8); + k.EnableInputLayout(DataLayout::b_fs_yx_fsv4); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4); k.EnableTensorOffset(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp index 3028213f300..cb1d2d3dd5a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp @@ -73,6 +73,7 @@ ParamsKey ConvolutionKernel_imad::GetSupportedKey() const { k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); k.EnableInputWeightsType(WeightsType::INT8); k.EnableInputWeightsType(WeightsType::UINT8); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp index eebc432a30a..adc69bf6462 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_1x1.cpp @@ -74,11 +74,15 @@ ParamsKey ConvolutionKernel_imad_b_fs_yx_fsv4_1x1::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::INT8); k.EnableInputWeightsType(WeightsType::UINT8); + k.EnableInputLayout(DataLayout::b_fs_yx_fsv4); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4); k.EnableDifferentTypes(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp index ca90f840cc6..43b83e2681a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_b_fs_yx_fsv4_dw.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2019 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -72,11 +72,15 @@ ParamsKey ConvolutionKernel_imad_b_fs_yx_fsv4_dw::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::INT8); k.EnableInputWeightsType(WeightsType::UINT8); + k.EnableInputLayout(DataLayout::b_fs_yx_fsv4); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4); k.EnableDifferentTypes(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp index 1bb73a1e978..4205064bb57 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.cpp @@ -34,6 +34,7 @@ ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetSupportedKey() co k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); k.EnableInputWeightsType(WeightsType::INT8); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp index cb6515063d8..f87066a0408 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.cpp @@ -34,6 +34,7 @@ ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetSupportedKey() co k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); k.EnableInputWeightsType(WeightsType::INT8); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp index 6943795471b..01eefa02cbb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32.cpp @@ -25,10 +25,14 @@ ParamsKey ConvolutionKernel_mmad_b_fs_yx_fsv32::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::INT8); + k.EnableInputLayout(DataLayout::b_fs_yx_fsv32); k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp index f1963f1ecf4..35926d89412 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_b_fs_yx_fsv32_dw.cpp @@ -26,10 +26,14 @@ ParamsKey ConvolutionKernel_mmad_b_fs_yx_fsv32_dw::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::INT8); + k.EnableInputLayout(DataLayout::b_fs_yx_fsv32); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32); k.EnableTensorOffset(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp index 81c686912f0..335ba9d3707 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.cpp @@ -27,10 +27,14 @@ ParamsKey ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv32::GetSupportedKey() const ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::INT8); + k.EnableInputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::b_fs_yx_fsv4); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp index 520e77ea6f0..2aef38410ed 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv4.cpp @@ -25,10 +25,14 @@ ParamsKey ConvolutionKernel_mmad_bfyx_to_b_fs_yx_fsv4::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::INT8); + k.EnableInputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4); k.EnableOutputLayout(DataLayout::byxf_af32); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_blocks.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_blocks.cpp index 1241809f1c7..90ff761aaf2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_blocks.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_blocks.cpp @@ -43,11 +43,14 @@ ParamsKey ConvolutionKernel_mmad_blocks::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::INT8); + k.EnableInputLayout(DataLayout::byxf_af32); k.EnableOutputLayout(DataLayout::byxf_af32); k.EnableTensorOffset(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp index 4c4e77a06d3..8eae48e4ca2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp @@ -31,10 +31,12 @@ ParamsKey DeconvolutionKernel_imad_along_f_tile_bfx::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); + k.EnableInputWeightsType(WeightsType::INT8); k.EnableInputWeightsType(WeightsType::UINT8); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp index 9edde95f13e..840d83a9eef 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp @@ -25,10 +25,12 @@ ParamsKey DeconvolutionKernel_imad_ref::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); + k.EnableInputWeightsType(WeightsType::INT8); k.EnableInputWeightsType(WeightsType::UINT8); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp index 948e832dc4b..ded8ebbe34f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ ParamsKey FullyConnectedKernelIMAD::GetSupportedKey() const { k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); k.EnableInputWeightsType(WeightsType::INT8); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp index ea6392e4f39..441a3a409e1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad.cpp @@ -25,11 +25,14 @@ ParamsKey FullyConnectedKernelMMAD::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); + k.EnableInputWeightsType(WeightsType::INT8); + k.EnableDifferentInputWeightsTypes(); k.EnableDifferentTypes(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp index c1954780e80..df5534a0478 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8.cpp @@ -23,10 +23,12 @@ ParamsKey GemmKernelMMADint8::GetSupportedKey() const { k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); + k.EnableInputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::bfzyx); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp index b62ca73a81c..0b1f3074cd8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_mmad_int8_slm.cpp @@ -23,10 +23,12 @@ ParamsKey GemmKernelMMADslmInt8::GetSupportedKey() const { k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); + k.EnableInputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::bfzyx); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp index fe6d6950955..80955a17238 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp @@ -30,10 +30,12 @@ ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const { k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); + k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp index 14ea8d30252..63a7a3425cd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2019 Intel Corporation +// Copyright (c) 2018-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,10 +25,12 @@ ParamsKey MVNKernelRef::GetSupportedKey() const { k.EnableInputDataType(Datatype::F32); k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); + k.EnableAllInputLayout(); k.EnableAllOutputLayout(); k.EnableTensorOffset(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp index 0a596398405..802b2181598 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_zyx_fsv16_imad.cpp @@ -25,6 +25,7 @@ ParamsKey PoolingKernelGPU_b_fs_zyx_fsv16_imad::GetSupportedKey() const { k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp index b726f2ed630..a0af34fedab 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bs_fs_yx_bsv16_fsv16.cpp @@ -30,6 +30,7 @@ ParamsKey Pooling_kernel_gpu_bs_fs_yx_bsv_16_fsv16::GetSupportedKey() const { k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16); k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp index 1111df860f7..cdb53d8e26f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,10 +26,14 @@ ParamsKey ReduceKernelRef::GetSupportedKey() const { k.EnableInputDataType(Datatype::F32); k.EnableInputDataType(Datatype::INT32); k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::INT32); k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); + k.EnableInputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::bfzyx); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_imad.cl index 1f0dc987057..b551bca5ce5 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_imad.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_imad.cl @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -92,7 +92,7 @@ KERNEL(fully_connected_gpu_IMAD)( output[out_index] = res; #else - output[out_index] = dequantized; + output[out_index] = TO_OUTPUT_TYPE(dequantized); #endif } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp index 7f0a4535b5e..1b44664ecbf 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp @@ -191,9 +191,11 @@ attach_eltwise_gpu::attach_eltwise_gpu() { { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), eltwise_gpu::create }, { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), eltwise_gpu::create }, { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), eltwise_gpu::create }, + { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), eltwise_gpu::create }, { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), eltwise_gpu::create }, { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), eltwise_gpu::create }, { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), eltwise_gpu::create }, + { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), eltwise_gpu::create }, // { std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), eltwise_gpu::create }}); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp index 0533bfd965e..6b273640b0e 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016-2019 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -97,9 +97,11 @@ attach_resample_gpu::attach_resample_gpu() { {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), resample_gpu::create}, {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), resample_gpu::create}, {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), resample_gpu::create}, + {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), resample_gpu::create}, {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), resample_gpu::create}, {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), resample_gpu::create}, {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf_af32), resample_gpu::create}, + {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf_af32), resample_gpu::create}, {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), resample_gpu::create}, {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf_af32), resample_gpu::create}}); } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index 9f094384af7..07134fa82e8 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -2353,6 +2353,21 @@ TEST_P(fc_int8_scale, basic) { execute(p); } +TEST_P(fc_int8_scale, fp16_scale_out) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + data("weights", get_mem(get_weights_layout(p))), + data("bias", get_mem(get_bias_layout(p))), + data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / p.kernel.count())), + fully_connected("fc_prim", "input", "weights", "bias", data_types::f32), + scale("scale", "fc_prim", "scale_data", optional_data_type{data_types::f16}), + reorder("reorder_bfyx", "scale", p.default_format, data_types::f32) + ); + + tolerance = 1e-5f; + execute(p); +} + INSTANTIATE_TEST_CASE_P(fusings_gpu, fc_int8_scale, ::testing::ValuesIn(std::vector{ bc_test_params{ CASE_FC_U8S8_1, 2, 3 }, @@ -2523,6 +2538,45 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, gemm_2in_quantize_u8, gemm_test_params{ CASE_GEMM_2IN_U8U8_3, 3, 4 }, }), ); +class gemm_2in_scale : public GemmFusingTest {}; +TEST_P(gemm_2in_scale, basic) { + auto p = GetParam(); + create_topologies(input_layout("input0", get_input_layout(p, 0)), + input_layout("input1", get_input_layout(p, 1)), + data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())), + gemm("gemm_prim", { "input0", "input1" }, data_types::f32), + scale("scale", "gemm_prim", "scale_data"), + reorder("reorder_bfyx", "scale", p.default_format, data_types::f32) + ); + + tolerance = 1e-5f; + execute(p); +} + +TEST_P(gemm_2in_scale, fp16_scale_out) { + auto p = GetParam(); + create_topologies(input_layout("input0", get_input_layout(p, 0)), + input_layout("input1", get_input_layout(p, 1)), + data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())), + gemm("gemm_prim", { "input0", "input1" }, data_types::f32), + scale("scale", "gemm_prim", "scale_data", optional_data_type{data_types::f16}), + reorder("reorder_bfyx", "scale", p.default_format, data_types::f32) + ); + + tolerance = 1e-5f; + execute(p); +} + + +INSTANTIATE_TEST_CASE_P(fusings_gpu, gemm_2in_scale, + ::testing::ValuesIn(std::vector{ + gemm_test_params{ CASE_GEMM_2IN_FP32_1, 3, 4 }, + gemm_test_params{ CASE_GEMM_2IN_FP16_1, 3, 4 }, + gemm_test_params{ CASE_GEMM_2IN_U8U8_1, 3, 4 }, + gemm_test_params{ CASE_GEMM_2IN_U8U8_2, 3, 4 }, + gemm_test_params{ CASE_GEMM_2IN_U8U8_3, 3, 4 }, +}), ); + class gemm_2in_act_scale_quantize_i8 : public GemmFusingTest {}; TEST_P(gemm_2in_act_scale_quantize_i8, basic) { auto p = GetParam(); @@ -3882,6 +3936,74 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_actv, deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 3 }, }), ); +class deconv_scale : public DeconvolutionFusingTest {}; +TEST_P(deconv_scale, basic) { + auto p = GetParam(); + create_topologies( + input_layout("input", get_input_layout(p)), + data("weights", get_mem(get_weights_layout(p))), + data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())), + deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad), + scale("scale", "deconv", "scale_data"), + reorder("out", "scale", p.default_format, data_types::f32) + ); + tolerance = 1e-5f; + execute(p); +} + +TEST_P(deconv_scale, fp16_scale_out) { + auto p = GetParam(); + create_topologies( + input_layout("input", get_input_layout(p)), + data("weights", get_mem(get_weights_layout(p))), + data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())), + deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad), + scale("scale", "deconv", "scale_data", optional_data_type{data_types::f16}), + reorder("out", "scale", p.default_format, data_types::f32) + ); + tolerance = 1e-5f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale, + ::testing::ValuesIn(std::vector{ + deconv_test_params{ CASE_DECONV_U8S8_1, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_2, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_3, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_4, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_5, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_6, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_7, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_8, 2, 3 }, + + deconv_test_params{ CASE_DECONV_S8S8_1, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_2, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_3, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_4, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_5, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_6, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_7, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_8, 2, 3 }, + + deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 3 }, + deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 3 }, + + deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 3 }, + deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 3 }, +}), ); + class deconv_actv_eltw_actv : public DeconvolutionFusingTest {}; TEST_P(deconv_actv_eltw_actv, basic) { auto p = GetParam(); @@ -4440,6 +4562,19 @@ TEST_P(pooling_f32_scale, basic) { execute(p); } +TEST_P(pooling_f32_scale, fp16_scale_out) { + auto p = GetParam(); + create_topologies( + input_layout("input", get_input_layout(p)), + data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / tensor{1, 1, 3, 3}.count())), + pooling("pooling", "input", p.pool_mode, tensor{1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}), + scale("scale", "pooling", "scale_data", optional_data_type{data_types::f16}), + reorder("output_reorder", "scale", format::bfyx, data_types::f32)); + + tolerance = 1e-5f; + execute(p); +} + INSTANTIATE_TEST_CASE_P(fusings_gpu, pooling_f32_scale, ::testing::ValuesIn(std::vector{