From 61389b5e510f634943b2c68713cdae0ca67665e2 Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Thu, 11 Mar 2021 12:13:11 +0300 Subject: [PATCH] Fix for compilation issue for ARM 32-bit (#4717) --- .../ie_preprocess_gapi_kernels_neon.cpp | 8 ++--- .../thirdparty/ocv/opencv_hal_neon.hpp | 35 ++++++++----------- 2 files changed, 19 insertions(+), 24 deletions(-) diff --git a/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.cpp b/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.cpp index 0ee824c8748..b4dcb20213b 100644 --- a/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.cpp +++ b/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.cpp @@ -257,8 +257,8 @@ CV_ALWAYS_INLINE void horizontal_4LPI(std::array, chanNu v_uint8 q2 = v_shuffle(q0, hmask); v_uint8 q3 = v_shuffle(q1, hmask); - v_uint8 q4 = v_blend<0xCC /*0b11001100*/>(q2, v_slli_si128(q3, 4)); - v_uint8 q5 = v_blend<0xCC /*0b11001100*/>(v_srli_si128(q2, 4), q3); + v_uint8 q4 = v_blend<0xCC /*0b11001100*/>(q2, v_shift_left<4>(q3)); + v_uint8 q5 = v_blend<0xCC /*0b11001100*/>(v_shift_right<4>(q2), q3); v_store_low(&dst[c][0][x], q4); v_store_high(&dst[c][1][x], q4); @@ -334,8 +334,8 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_(std::array v_uint8 q0 = v_pack_u(r0, r1); v_uint8 q1 = v_pack_u(r2, r3); - v_uint8 q2 = v_blend<0xCC /*0b11001100*/>(q0, v_slli_si128(q1, 4)); - v_uint8 q3 = v_blend<0xCC /*0b11001100*/>(v_srli_si128(q0, 4), q1); + v_uint8 q2 = v_blend<0xCC /*0b11001100*/>(q0, v_shift_left<4>(q1)); + v_uint8 q3 = v_blend<0xCC /*0b11001100*/>(v_shift_right<4>(q0), q1); v_uint8 q4 = v_shuffle(q2, vmask); v_uint8 q5 = v_shuffle(q3, vmask); diff --git a/inference-engine/thirdparty/ocv/opencv_hal_neon.hpp b/inference-engine/thirdparty/ocv/opencv_hal_neon.hpp index d37d377b012..0427423d788 100644 --- a/inference-engine/thirdparty/ocv/opencv_hal_neon.hpp +++ b/inference-engine/thirdparty/ocv/opencv_hal_neon.hpp @@ -46,6 +46,7 @@ #define OPENCV_HAL_INTRIN_NEON_HPP #include +#include namespace cv @@ -2447,34 +2448,28 @@ CV_ALWAYS_INLINE v_uint8x16 v_shuffle(const v_uint8x16& a, const v_uint8x16& mas #endif } -CV_ALWAYS_INLINE v_uint8x16 v_slli_si128(const v_uint8x16& a, const int imm) +template +CV_ALWAYS_INLINE v_uint8x16 v_slli_si128(const v_uint8x16& a) { - uint8x16_t ret = {}; - if (imm <= 0) { - ret = a.val; - } - if (imm > 15) { - ret = vdupq_n_u8(0); - } else { - ret = vextq_u8(vdupq_n_u8(0), a.val, 16 - (imm)); - } + assert((shift > 0) && (shift <= 15)); + uint8x16_t ret = vextq_u8(vdupq_n_u8(0), a.val, shift /*16 - (imm)*/); return v_uint8x16(ret); } -CV_ALWAYS_INLINE v_uint8x16 v_srli_si128(const v_uint8x16& a, const int imm) +template +CV_ALWAYS_INLINE v_uint8x16 v_shift_right(const v_uint8x16& a) { - uint8x16_t ret = {}; - if (imm <= 0) { - ret = a.val; - } - if (imm > 15) { - ret = vdupq_n_u8(0); - } else { - ret = vextq_u8(a.val, vdupq_n_u8(0), imm); - } + assert((shift > 0) && (shift <= 15)); + uint8x16_t ret = vextq_u8(a.val, vdupq_n_u8(0), shift); return v_uint8x16(ret); } +template +CV_ALWAYS_INLINE v_uint8x16 v_shift_left(const v_uint8x16& a) +{ + return v_slli_si128<16 - shift>(a); +} + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond