diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp index 6457cf85649..945524cdde0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.cpp @@ -27,12 +27,9 @@ ParamsKey LRNKernelAcrossChannelMultipleFeatures::GetSupportedKey() const { k.EnableOutputDataType(Datatype::UINT8); k.EnableInputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::b_fs_yx_fsv4); - k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); k.EnableInputLayout(DataLayout::yxfb); k.EnableOutputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4); - k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); k.EnableOutputLayout(DataLayout::yxfb); k.EnableLRNMode(LRNMode::ACROSS_CHANNEL); k.EnableLRNKernelDividerMode(KernelDividerMode::FIXED); @@ -64,8 +61,7 @@ CommonDispatchData LRNKernelAcrossChannelMultipleFeatures::SetDefault(const lrn_ unsigned int ofm_per_simd = GetOfmPerSimd(params); - if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::b_fs_yx_fsv4 || - input.GetLayout() == DataLayout::b_fs_yx_fsv16) { + if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::b_fs_yx_fsv4) { const auto& out = params.output; const unsigned int alignment = out.X().v > 16 ? 32 : 16; @@ -111,8 +107,8 @@ JitConstants LRNKernelAcrossChannelMultipleFeatures::GetJitConstants(const lrn_p unsigned int ofm_per_simd = GetOfmPerSimd(params); jit.AddConstant(MakeJitConstant("OFM_PER_SIMD", ofm_per_simd)); - if ((input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::b_fs_yx_fsv4 || - input.GetLayout() == DataLayout::b_fs_yx_fsv16) && output.X().v <= 16) { + if ((input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::b_fs_yx_fsv4) && + output.X().v <= 16) { jit.AddConstant(MakeJitConstant("FORCE_SIMD_16", 1)); } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h index 034facd0890..395bc90c440 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h @@ -22,12 +22,12 @@ namespace kernel_selector { class LRNKernelAcrossChannelMultipleFeatures : public LRNKernelBase { public: using Parent = LRNKernelBase; - LRNKernelAcrossChannelMultipleFeatures() : Parent("lrn_gpu_across_channel_multiple_features") {} + LRNKernelAcrossChannelMultipleFeatures(const std::string& s = "lrn_gpu_across_channel_multiple_features") : Parent(s) {} KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; -private: +protected: DispatchData SetDefault(const lrn_params& params) const override; std::vector GetSupportedFusedOps() const override { return { FusedOpType::QUANTIZE, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp new file mode 100644 index 00000000000..69fd39188b3 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.cpp @@ -0,0 +1,76 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "lrn_kernel_across_channel_multiple_features_fsv16.h" +#include "kernel_selector_utils.h" +#include + +namespace kernel_selector { +ParamsKey LRNKernelAcrossChannelMultipleFeaturesFSV16::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); + k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); + k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); + k.EnableLRNMode(LRNMode::ACROSS_CHANNEL); + k.EnableLRNKernelDividerMode(KernelDividerMode::FIXED); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableDifferentTypes(); + return k; +} + +CommonDispatchData LRNKernelAcrossChannelMultipleFeaturesFSV16::SetDefault(const lrn_params& params) const { + CommonDispatchData runInfo = LRNKernelBase::SetDefault(params); + + const auto& out = params.output; + const unsigned int alignment = 16; + + std::vector global = {Align(out.Feature().v, alignment), + out.X().v, + out.Y().v * out.Batch().v}; + + auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + + runInfo.gws0 = global[0]; + runInfo.gws1 = global[1]; + runInfo.gws2 = global[2]; + + runInfo.lws0 = local[0]; + runInfo.lws1 = local[1]; + runInfo.lws2 = local[2]; + + runInfo.efficiency = FORCE_PRIORITY_6; + + return runInfo; +} + +JitConstants LRNKernelAcrossChannelMultipleFeaturesFSV16::GetJitConstants(const lrn_params& params, const DispatchData& kd) const { + JitConstants jit = LRNKernelBase::GetJitConstants(params, kd); + const auto& input_dt = params.inputs[0].GetDType(); + + if (!params.fused_ops.empty()) { + FusedOpsConfiguration conf = {"", {"batch_id", "feature_id", "y", "x"}, "lrn_result", input_dt}; + jit.Merge(MakeFusedOpsJitConstants(params, {conf})); + } + + return jit; +} +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h new file mode 100644 index 00000000000..782757726ad --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features_fsv16.h @@ -0,0 +1,32 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#pragma once + +#include "lrn_kernel_across_channel_multiple_features.h" + +namespace kernel_selector { +class LRNKernelAcrossChannelMultipleFeaturesFSV16 : public LRNKernelAcrossChannelMultipleFeatures { +public: + using Parent = LRNKernelAcrossChannelMultipleFeatures; + LRNKernelAcrossChannelMultipleFeaturesFSV16() : Parent("lrn_gpu_across_channel_multiple_features_fsv16") {} + + ParamsKey GetSupportedKey() const override; + +private: + DispatchData SetDefault(const lrn_params& params) const override; + JitConstants GetJitConstants(const lrn_params& params, const DispatchData& kd) const override; +}; +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_selector.cpp index ddd1df16fc2..1677c849d72 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_selector.cpp @@ -20,6 +20,7 @@ #include "lrn_kernel_across_channel_ref.h" #include "lrn_kernel_across_channel_opt_b8.h" #include "lrn_kernel_across_channel_multiple_features.h" +#include "lrn_kernel_across_channel_multiple_features_fsv16.h" #include "lrn_kernel_within_channel_byxf_opt.h" namespace kernel_selector { @@ -31,9 +32,10 @@ lrn_kernel_selector::lrn_kernel_selector() { Attach(); Attach(); Attach(); + Attach(); } KernelsData lrn_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const { return GetNaiveBestKernel(params, options, KernelType::LRN); } -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lrn_gpu_across_channel_multiple_features.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lrn_gpu_across_channel_multiple_features.cl index 6e4c6ae6772..8aa56fbd920 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lrn_gpu_across_channel_multiple_features.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lrn_gpu_across_channel_multiple_features.cl @@ -1,4 +1,4 @@ -// Copyright (c) 2016-2017 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ KERNEL (lrn_gpu_across_channel_multiple_features)( #endif ) { -#if defined OUTPUT_LAYOUT_BFYX || defined OUTPUT_LAYOUT_B_FS_YX_FSV4 || defined OUTPUT_LAYOUT_B_FS_YX_FSV16 +#if defined OUTPUT_LAYOUT_BFYX || defined OUTPUT_LAYOUT_B_FS_YX_FSV4 // PERF NOTE: SIMD IS OVER global_id(0) so in SIMD global_id(1) and global_id(2) does not change, so we can use group_id to have SIMD1 instructions const uint x = get_global_id(0); const uint y = get_group_id(1); @@ -61,13 +61,12 @@ KERNEL (lrn_gpu_across_channel_multiple_features)( // prefetch for(uint j = 0; j < OFM_PER_SIMD; j++) { + bool zero = input_offset_f < 0 || input_offset_f >= INPUT0_FEATURE_NUM; #if !INPUT0_SIMPLE uint input_idx = INPUT0_GET_INDEX(batch_id, feature_id - PADDING + j, y, x); input_idx = MULTIPLY_OFFSET(INPUT0_TYPE, input_idx); - bool zero = input_offset_f < 0 || input_offset_f >= INPUT0_FEATURE_NUM; vals[j] = zero ? INPUT0_VAL_ZERO : TO_INPUT0_TYPE(ALPHA_VAL_FACTOR_DIV_BY_SIZE) * (*OFFSET_GLOBAL_PTR(INPUT0_TYPE, input, input_idx)); #else - bool zero = input_offset_f < 0 || input_offset_f >= INPUT0_FEATURE_NUM; vals[j] = zero ? INPUT0_VAL_ZERO : TO_INPUT0_TYPE(ALPHA_VAL_FACTOR_DIV_BY_SIZE) * (*OFFSET_GLOBAL_PTR(INPUT0_TYPE, input, input_idx)); input_idx += MULTIPLY_OFFSET(INPUT0_VAL_ZERO, INPUT0_FEATURE_PITCH); #endif @@ -100,10 +99,6 @@ KERNEL (lrn_gpu_across_channel_multiple_features)( for(uint j = 0; j < OFM_PER_SIMD; j++) { results[j] = mad(vals[j], vals[j], results[j]); - } - - for(uint j = 0; j < OFM_PER_SIMD; j++) - { results[j] = mad(results[j], TO_INPUT0_TYPE(ALPHA_DIV_BY_SIZE), TO_INPUT0_TYPE(K)); results[j] = native_powr(results[j], -TO_INPUT0_TYPE(BETA)); } @@ -123,8 +118,7 @@ KERNEL (lrn_gpu_across_channel_multiple_features)( lrn_result = results[j] * input[input_id]; #if HAS_FUSED_OPS FUSED_OPS; - OUTPUT_TYPE res = FUSED_OPS_RESULT; - output[output_idx] = res; + output[output_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT); #else output[output_idx] = ACTIVATION(lrn_result, ACTIVATION_PARAMS); #endif diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lrn_gpu_across_channel_multiple_features_fsv16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lrn_gpu_across_channel_multiple_features_fsv16.cl new file mode 100644 index 00000000000..7baa5adcd17 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lrn_gpu_across_channel_multiple_features_fsv16.cl @@ -0,0 +1,58 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "include/common.cl" +#include "include/data_types.cl" +#include "include/fetch.cl" + +__attribute__((intel_reqd_sub_group_size(16))) +KERNEL (lrn_gpu_across_channel_multiple_features_fsv16)( + const __global INPUT0_TYPE* input, + __global OUTPUT_TYPE* output +#if HAS_FUSED_OPS_DECLS + , FUSED_OPS_DECLS +#endif + ) +{ + const uint feature_id = (uint)get_global_id(0); + const uint x = (uint)get_global_id(1); + const uint b_y = (uint)get_global_id(2); + const uint batch_id = b_y / INPUT0_SIZE_Y; + const uint y = b_y % INPUT0_SIZE_Y; + + if (feature_id >= INPUT0_FEATURE_NUM) + return; + + int input_offset_f = feature_id - PADDING; + + INPUT0_TYPE val[LOCAL_SIZE]; + INPUT0_TYPE res = 0; + for (uint i = 0; i < LOCAL_SIZE; ++i, ++input_offset_f) { + bool non_zero = input_offset_f >= 0 && input_offset_f < INPUT0_FEATURE_NUM; + uint input_idx = INPUT0_GET_INDEX(batch_id, input_offset_f, y, x); + val[i] = (int)non_zero * TO_INPUT0_TYPE(input[input_idx]); + res = mad(val[i], val[i], res); + } + res = mad(res, TO_INPUT0_TYPE(ALPHA_DIV_BY_SIZE), TO_INPUT0_TYPE(K)); + res = native_powr(res, -TO_INPUT0_TYPE(BETA)); + + uint output_idx = OUTPUT_GET_INDEX(batch_id, feature_id, y, x); + INPUT0_TYPE lrn_result = res * val[PADDING]; + #if HAS_FUSED_OPS + FUSED_OPS; + output[output_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT); + #else + output[output_idx] = ACTIVATION(TO_OUTPUT_TYPE(lrn_result), ACTIVATION_PARAMS); + #endif +} diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index 50280a8e9a7..25a0978ec05 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -3104,7 +3104,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, lrn_test_params{CASE_LRN_FP32_2, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_yxfb_b8_opt"}, lrn_test_params{CASE_LRN_FP32_3, 2, 5, lrn_norm_region_within_channel, "lrn_within_channel_byxf_opt"}, lrn_test_params{CASE_LRN_FP32_4, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, - lrn_test_params{CASE_LRN_FP32_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, + lrn_test_params{CASE_LRN_FP32_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16"}, // InputDataType = FP32 OutputDataType = FP16 lrn_test_params{CASE_LRN_FP32_TO_FP16_1, 2, 5, lrn_norm_region_across_channel, "lrn_ref"}, @@ -3114,7 +3114,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, lrn_test_params{CASE_LRN_FP32_TO_FP16_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel"}, lrn_test_params{CASE_LRN_FP32_TO_FP16_3, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_yxfb_b8_opt"}, lrn_test_params{CASE_LRN_FP32_TO_FP16_4, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, - lrn_test_params{CASE_LRN_FP32_TO_FP16_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, + lrn_test_params{CASE_LRN_FP32_TO_FP16_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16"}, }), ); @@ -3155,7 +3155,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, lrn_test_params{CASE_LRN_FP32_2, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_yxfb_b8_opt"}, lrn_test_params{CASE_LRN_FP32_3, 2, 5, lrn_norm_region_within_channel, "lrn_within_channel_byxf_opt"}, lrn_test_params{CASE_LRN_FP32_4, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, - lrn_test_params{CASE_LRN_FP32_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, + lrn_test_params{CASE_LRN_FP32_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16"}, // InputDataType = FP16 OutputDataType = INT8/UINT8 can't be tested for now, because quantize // primitive doesn't support input type FP16 while fusing (prepare_quantization.cpp :114 -> prepare_primitive_fusing.cpp :474) @@ -3198,7 +3198,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, lrn_test_params{CASE_LRN_FP32_2, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_yxfb_b8_opt"}, lrn_test_params{CASE_LRN_FP32_3, 2, 5, lrn_norm_region_within_channel, "lrn_within_channel_byxf_opt"}, lrn_test_params{CASE_LRN_FP32_4, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, - lrn_test_params{CASE_LRN_FP32_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, + lrn_test_params{CASE_LRN_FP32_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16"}, }), ); class lrn_fp16_scale_activation : public LrnFusingTest {}; @@ -3232,7 +3232,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, lrn_test_params{CASE_LRN_FP16_1, 2, 4, lrn_norm_region_across_channel, "lrn_gpu_across_channel_ref"}, lrn_test_params{CASE_LRN_FP16_3, 2, 4, lrn_norm_region_within_channel, "lrn_within_channel_byxf_opt"}, lrn_test_params{CASE_LRN_FP16_4, 2, 4, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, - lrn_test_params{CASE_LRN_FP16_5, 2, 4, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"}, + lrn_test_params{CASE_LRN_FP16_5, 2, 4, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16"}, }), ); /* ----------------------------------------------------------------------------------------------------- */ @@ -4058,19 +4058,19 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_activation_quantize_i8_eltwise ::testing::ValuesIn(std::vector{ conv_eltw_test_params{CASE_DECONV_ELTW_FP32_1, 5, 7}, conv_eltw_test_params{CASE_DECONV_ELTW_FP32_2, 5, 7}, - conv_eltw_test_params{CASE_DECONV_ELTW_FP32_3, 5, 7}, + conv_eltw_test_params{CASE_DECONV_ELTW_FP32_3, 5, 7}, conv_eltw_test_params{CASE_DECONV_ELTW_FP32_4, 5, 7}, conv_eltw_test_params{CASE_DECONV_ELTW_FP32_5, 5, 7}, - conv_eltw_test_params{CASE_DECONV_ELTW_FP32_6, 5, 7}, + conv_eltw_test_params{CASE_DECONV_ELTW_FP32_6, 5, 7}, conv_eltw_test_params{CASE_DECONV_ELTW_FP32_7, 5, 7}, conv_eltw_test_params{CASE_DECONV_ELTW_FP32_8, 5, 7}, - + conv_eltw_test_params{CASE_DECONV_ELTW_i8_1, 2, 7}, conv_eltw_test_params{CASE_DECONV_ELTW_i8_2, 2, 7}, conv_eltw_test_params{CASE_DECONV_ELTW_i8_3, 2, 7}, conv_eltw_test_params{CASE_DECONV_ELTW_i8_4, 2, 7}, conv_eltw_test_params{CASE_DECONV_ELTW_i8_5, 2, 7}, - + }), ); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/lrn_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/lrn_gpu_test.cpp new file mode 100644 index 00000000000..38579ffda3b --- /dev/null +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/lrn_gpu_test.cpp @@ -0,0 +1,218 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#include + +#include +#include +#include +#include +#include +#include + +#include + +using namespace cldnn; +using namespace ::tests; + +TEST(lrn_fp32_gpu, basic) { + // input : 1x16x1x1 + // Output : 1x16x1x1 + const auto& engine = get_test_engine(); + + const size_t b = 1; + const size_t f = 16; + const size_t y = 1; + const size_t x = 1; + + auto input = memory::allocate(engine, { data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } }); + std::vector inputVals(b * f * y * x); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 0; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + uint32_t size = 2; + float k = 0.5f; + float alpha = 9.9e-05f; + float beta = 1.f; + topology.add(lrn("lrn", "input", size, k, alpha, beta, cldnn::lrn_norm_region_across_channel)); + + network network(engine, topology); + + network.set_input_data("input", input); + + auto outputs = network.execute(); + + auto output = outputs.at("lrn").get_memory(); + auto output_ptr = output.pointer(); + + std::vector expected_results = { + 0.f, 1.99901f, 3.99486f, 5.98519f, + 7.96766f, 9.93997f, 11.8999f, 13.8451f, + 15.7736f, 17.6831f, 19.5718f, 21.4376f, + 23.2787f, 25.0933f, 26.8797f, 29.3463f + }; + + ASSERT_EQ(output_ptr.size(), expected_results.size()); + for (size_t i = 0; i < expected_results.size(); ++i) { + EXPECT_TRUE(are_equal(expected_results[i], output_ptr[i])) << i; + } +} + +TEST(lrn_fp32_gpu, basic2) { + // input : 1x16x1x1 + // Output : 1x16x1x1 + const auto& engine = get_test_engine(); + + const size_t b = 1; + const size_t f = 16; + const size_t y = 1; + const size_t x = 1; + + auto input = memory::allocate(engine, { data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } }); + std::vector inputVals(b * f * y * x); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 0; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + uint32_t size = 5; + float k = 0.5f; + float alpha = 9.9e-05f; + float beta = 1.f; + topology.add(lrn("lrn", "input", size, k, alpha, beta, cldnn::lrn_norm_region_across_channel)); + + network network(engine, topology); + + network.set_input_data("input", input); + + auto outputs = network.execute(); + + auto output = outputs.at("lrn").get_memory(); + auto output_ptr = output.pointer(); + + std::vector expected_results = { + 0.f, 1.99889f, 3.99525f, 5.98696f, + 7.97159f, 9.94682f, 11.9104f, 13.86f, + 15.7936f, 17.709f, 19.6041f, 21.4769f, + 23.3257f, 25.1485f, 27.2091f, 29.3151f + }; + + ASSERT_EQ(output_ptr.size(), expected_results.size()); + for (size_t i = 0; i < expected_results.size(); ++i) { + EXPECT_TRUE(are_equal(expected_results[i], output_ptr[i])) << i; + } +} + +TEST(lrn_fp32_gpu, basic3) { + // input : 2x16x4x4 + // Output : 2x16x4x4 + const auto& engine = get_test_engine(); + + const size_t b = 2; + const size_t f = 16; + const size_t y = 4; + const size_t x = 4; + + auto input = memory::allocate(engine, { data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } }); + std::vector inputVals(b * f * y * x); + std::generate(inputVals.begin(), inputVals.end(), []() { + static float n = 0; + return n++; + }); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("input", input.get_layout())); + uint32_t size = 5; + float k = 1.f; + float alpha = 9.89999971e-05f; + float beta = 0.75f; + topology.add(lrn("lrn", "input", size, k, alpha, beta, cldnn::lrn_norm_region_across_channel)); + + network network(engine, topology); + + network.set_input_data("input", input); + + auto outputs = network.execute(); + + auto output = outputs.at("lrn").get_memory(); + auto output_ptr = output.pointer(); + + std::vector expected_results = { + 0.f, 0.999792f, 1.99911f, 2.99755f, 3.99466f, 4.99f, 5.98313f, 6.97361f, 7.96102f, 8.94493f, 9.92493f, + 10.9006f, 11.8715f, 12.8374f, 13.8493f, 14.8699f, 15.7966f, 16.696f, 17.5763f, 18.5035f, 19.4231f, 20.3347f, + 21.2381f, 22.133f, 23.019f, 23.8959f, 24.7635f, 25.6216f, 26.4698f, 27.308f, 28.5352f, 29.8116f, 30.5296f, + 30.9563f, 31.342f, 32.1164f, 32.8795f, 33.6315f, 34.372f, 35.1011f, 35.8188f, 36.5248f, 37.2192f, 37.9019f, + 38.573f, 39.2323f, 41.0464f, 43.0053f, 43.4314f, 42.8943f, 42.3534f, 42.9426f, 43.5202f, 44.0863f, 44.641f, + 45.1843f, 45.7162f, 46.2369f, 46.7464f, 47.2449f, 47.7324f, 48.209f, 50.9356f, 53.9937f, 54.1048f, 52.193f, + 50.4329f, 50.8466f, 51.2503f, 51.6441f, 52.0281f, 52.4024f, 52.7672f, 53.1226f, 53.4688f, 53.8059f, 54.1339f, + 54.4532f, 58.2539f, 62.6803f, 62.5006f, 58.9907f, 55.9219f, 56.1912f, 56.4526f, 56.7063f, 56.9524f, 57.1911f, + 57.4225f, 57.6468f, 57.864f, 58.0744f, 58.278f, 58.4751f, 63.3482f, 69.2298f, 68.8098f, 63.6702f, 59.3655f, + 59.5255f, 59.6799f, 59.8286f, 59.9718f, 60.1096f, 60.2422f, 60.3697f, 60.4921f, 60.6096f, 60.7223f, 60.8303f, + 66.6672f, 73.9448f, 73.3429f, 66.6814f, 61.3032f, 61.3851f, 61.463f, 61.537f, 61.6071f, 61.6735f, 61.7363f, + 61.7954f, 61.8511f, 61.9035f, 61.9524f, 61.9982f, 68.6425f, 77.1679f, 76.4387f, 68.44f, 62.181f, 62.2089f, + 62.2341f, 62.2566f, 62.2765f, 62.2939f, 62.3088f, 62.3213f, 62.3314f, 62.3393f, 62.345f, 62.3484f, 69.6361f, + 79.2224f, 78.4118f, 69.288f, 62.335f, 62.3266f, 62.3163f, 62.3042f, 62.2905f, 62.275f, 62.258f, 62.2393f, + 62.2191f, 62.1974f, 62.1742f, 62.1496f, 69.9303f, 80.3855f, 79.5293f, 69.4891f, 62.0065f, 61.9741f, 61.9405f, + 61.9058f, 61.87f, 61.8331f, 61.7951f, 61.7561f, 61.7162f, 61.6752f, 61.6333f, 61.5906f, 69.7361f, 80.882f, + 80.0067f, 69.2391f, 61.3639f, 61.3161f, 61.2677f, 61.2185f, 61.1686f, 61.1181f, 61.0669f, 61.015f, 60.9625f, + 60.9095f, 60.8559f, 60.8017f, 69.207f, 80.8876f, 80.0121f, 68.6802f, 60.523f, 60.4658f, 60.4082f, 60.3501f, + 60.2917f, 60.2328f, 60.1736f, 60.1139f, 60.054f, 59.9937f, 59.933f, 59.872f, 68.4534f, 80.5369f, 79.6741f, + 67.9147f, 59.5628f, 59.5002f, 59.4373f, 59.3742f, 59.3109f, 59.2473f, 59.1836f, 59.1197f, 59.0555f, 58.9912f, + 58.9268f, 58.8622f, 67.5537f, 79.9313f, 79.0897f, 67.0151f, 58.537f, 58.4716f, 58.4061f, 58.3405f, 58.2748f, + 58.209f, 58.1431f, 58.0772f, 58.0112f, 57.9451f, 57.879f, 57.8128f, 66.5636f, 79.1467f, 78.3317f, 66.0328f, + 57.4815f, 57.4151f, 57.3488f, 57.2824f, 57.216f, 57.1496f, 57.0832f, 57.0169f, 56.9505f, 56.8842f, 56.8178f, + 56.7515f, 65.5222f, 78.2394f, 77.4541f, 65.0043f, 56.4205f, 56.3544f, 56.2884f, 56.2224f, 56.1564f, 56.0905f, + 56.0247f, 55.9589f, 55.8932f, 55.8275f, 55.762f, 55.6965f, 64.4569f, 77.2513f, 76.4972f, 63.955f, 55.37f, + 55.305f, 55.24f, 55.1751f, 55.1104f, 55.0457f, 54.9811f, 54.9165f, 54.8521f, 54.7878f, 54.7236f, 54.6595f, + 63.3869f, 76.2131f, 75.4906f, 62.9026f, 54.3404f, 54.2769f, 54.2135f, 54.1503f, 54.0871f, 54.0241f, 53.9611f, + 53.8983f, 53.8356f, 53.773f, 53.7105f, 53.6482f, 62.3253f, 75.1476f, 74.4564f, 61.8595f, 53.3382f, 53.2766f, + 53.2151f, 53.1537f, 53.0924f, 53.0313f, 52.9703f, 52.9094f, 52.8487f, 52.788f, 52.7275f, 52.6672f, 61.2811f, + 74.0714f, 73.4107f, 60.834f, 52.3673f, 52.3077f, 52.2482f, 52.1889f, 52.1297f, 52.0706f, 52.0117f, 51.9529f, + 51.8942f, 51.8357f, 51.7773f, 51.719f, 60.2603f, 72.9966f, 72.3652f, 59.8317f, 51.4297f, 51.3722f, 51.3149f, + 51.2577f, 51.2006f, 51.1437f, 51.0869f, 51.0302f, 50.9737f, 50.9173f, 50.861f, 50.8049f, 59.2667f, 71.9317f, + 71.3285f, 58.8561f, 50.5263f, 50.4709f, 50.4157f, 50.3607f, 50.3058f, 50.251f, 50.1963f, 50.1418f, 50.0874f, + 50.0331f, 49.979f, 49.925f, 58.3025f, 70.8832f, 70.3067f, 57.9093f, 49.657f, 49.6038f, 49.5508f, 49.4978f, + 49.445f, 49.3923f, 49.3398f, 49.2873f, 49.2351f, 49.1829f, 49.1309f, 49.079f, 57.3689f, 69.8552f, 69.3041f, + 56.9924f, 48.8214f, 48.7703f, 48.7193f, 48.6684f, 48.6177f, 48.567f, 48.5165f, 48.4662f, 48.4159f, 48.3658f, + 48.3158f, 48.266f, 56.4664f, 68.8508f, 68.3236f, 56.1058f, 48.0185f, 47.9694f, 47.9204f, 47.8715f, 47.8228f, + 47.7741f, 47.7256f, 47.6772f, 47.629f, 47.5808f, 47.5328f, 47.4849f, 55.5949f, 67.8717f, 67.3672f, 55.2492f, + 47.2472f, 47.2f, 47.1529f, 47.106f, 47.0591f, 47.0124f, 46.9658f, 46.9193f, 46.8729f, 46.8267f, 46.7806f, + 46.7345f, 54.7537f, 66.9191f, 66.436f, 54.4224f, 46.5061f, 46.4608f, 46.4156f, 46.3705f, 46.3254f, 46.2806f, + 46.2358f, 46.1911f, 46.1466f, 46.1021f, 46.0578f, 46.0135f, 53.9422f, 65.9936f, 65.5306f, 53.6244f, 45.7941f, + 45.7505f, 45.707f, 45.6636f, 45.6204f, 45.5772f, 45.5342f, 45.4913f, 45.4484f, 45.4057f, 45.3631f, 45.3206f, + 53.1594f, 65.0953f, 64.6512f, 52.8544f, 45.1096f, 45.0677f, 45.0259f, 44.9842f, 44.9426f, 44.9011f, 44.8597f, + 44.8184f, 44.7773f, 44.7362f, 44.6952f, 44.6543f, 52.4043f, 64.2239f, 63.7976f, 52.1113f, 44.4513f, 44.411f, + 44.3708f, 44.3307f, 44.2907f, 44.2508f, 44.211f, 44.1713f, 44.1317f, 44.0921f, 44.0527f, 44.0134f, 51.6758f, + 63.379f, 62.9695f, 51.3943f, 43.8181f, 43.7793f, 43.7406f, 43.702f, 43.6635f, 43.6251f, 43.5867f, 43.5485f, + 43.5104f, 43.4723f, 43.4343f, 43.3965f, 50.9729f, 62.56f + }; + + ASSERT_EQ(output_ptr.size(), expected_results.size()); + for (size_t i = 0; i < expected_results.size(); ++i) { + EXPECT_TRUE(are_equal(expected_results[i], output_ptr[i])) << i; + } +}