Pre-processing: Resize Linear U8C3/C4 refactoring (#6356)
* Resize Linear U8 C3C4toPlanes refactoring * Fix alignment and remove unused code * Delete extern template specializations
This commit is contained in:
parent
f0abf9ebe3
commit
27441c2310
@ -274,20 +274,23 @@ CV_ALWAYS_INLINE void horizontal_4LPI(std::array<std::array<uint8_t*, 4>, chanNu
|
||||
}
|
||||
|
||||
template<int chanNum>
|
||||
CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>, chanNum>& dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
CV_ALWAYS_INLINE bool calcRowLinear_8UC_Impl(neon_tag,
|
||||
std::array<std::array<uint8_t*, 4>, chanNum>& dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
static_assert(v_uint8::nlanes == 16,
|
||||
"The wide of NEON vector is 128 bits, so one vector contains 16 uchars");
|
||||
constexpr int nlanes = static_cast<int>(v_uint8::nlanes);
|
||||
if ((inSz.width * chanNum < nlanes) || (outSz.width < nlanes))
|
||||
return false;
|
||||
|
||||
bool xRatioEq = inSz.width == outSz.width;
|
||||
bool yRatioEq = inSz.height == outSz.height;
|
||||
@ -297,7 +300,7 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>
|
||||
1, 5, 9, 13, 3, 7, 11, 15 };
|
||||
if (4 == lpi) {
|
||||
// vertical pass
|
||||
neon::vertical_4LPI(src0, src1, tmp, beta, inSz.width * chanNum);
|
||||
vertical_4LPI(src0, src1, tmp, beta, inSz.width * chanNum);
|
||||
|
||||
// horizontal pass
|
||||
horizontal_4LPI<chanNum>(dst, tmp, mapsx, _mask_horizontal, clone, outSz.width);
|
||||
@ -378,38 +381,7 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>
|
||||
channels2planes_store<chanNum>(dst, src, outSz.width, l);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC3)
|
||||
void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3>& dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
calcRowLinear_8UC_Impl_<3>(dst, src0, src1, alpha, clone, mapsx,
|
||||
beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4>& dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
calcRowLinear_8UC_Impl_<4>(dst, src0, src1, alpha, clone, mapsx,
|
||||
beta, tmp, inSz, outSz, lpi);
|
||||
return true;
|
||||
}
|
||||
|
||||
CV_ALWAYS_INLINE void horizontal_4LPI(uint8_t* dst[],
|
||||
@ -506,6 +478,43 @@ CV_ALWAYS_INLINE void horizontal_anyLPI(uint8_t* dst,
|
||||
}
|
||||
} // namespace neon
|
||||
|
||||
template<>
|
||||
bool calcRowLinear8UC3C4Impl<neon_tag, 3>(neon_tag,
|
||||
std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int ) {
|
||||
constexpr int chanNum = 3;
|
||||
return neon::calcRowLinear_8UC_Impl<chanNum>(neon_tag{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
template<>
|
||||
bool calcRowLinear8UC3C4Impl<neon_tag, 4>(neon_tag,
|
||||
std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int ) {
|
||||
constexpr int chanNum = 4;
|
||||
return neon::calcRowLinear_8UC_Impl<chanNum>(neon_tag{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// 8UC1 Resize (bi-linear)
|
||||
template<>
|
||||
bool calcRowLinear8UC1Impl(neon_tag,
|
||||
|
@ -27,47 +27,6 @@ void calcRowArea_8U(uchar dst[], const uchar *src[], const Size &inSz, const Siz
|
||||
void calcRowArea_32F(float dst[], const float *src[], const Size &inSz, const Size &outSz,
|
||||
float yalpha, const MapperUnit32F& ymap, int xmaxdf, const int xindex[],
|
||||
const float xalpha[], float vbuf[]);
|
||||
|
||||
// Resize (bi-linear, 8UC3)
|
||||
void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
int lpi);
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
int lpi);
|
||||
|
||||
template<int numChan>
|
||||
void calcRowLinear_8UC(std::array<std::array<uint8_t*, 4>, numChan> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
int lpi) {
|
||||
calcRowLinear_8U(std::integral_constant<int, numChan>{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
} // namespace neon
|
||||
|
||||
template<typename isa_tag_t, typename T>
|
||||
@ -83,7 +42,7 @@ extern template void nv12ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const ui
|
||||
|
||||
template<typename isa_tag_t>
|
||||
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
|
||||
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||
|
||||
extern template void i420ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||
@ -125,6 +84,13 @@ extern template void calcRowLinear32FC1Impl(neon_tag, float* dst[], const float*
|
||||
const int mapsx[], const float beta[],
|
||||
const Size& inSz, const Size& outSz,
|
||||
const int lpi, const int l);
|
||||
|
||||
template<typename isa_tag_t, int chs>
|
||||
bool calcRowLinear8UC3C4Impl(isa_tag_t, std::array<std::array<uint8_t*, 4>, chs>& dst,
|
||||
const uint8_t* src0[], const uint8_t* src1[],
|
||||
const short alpha[], const short clone[], const short mapsx[],
|
||||
const short beta[], uint8_t tmp[], const Size& inSz,
|
||||
const Size& outSz, const int lpi, const int l);
|
||||
} // namespace kernels
|
||||
} // namespace gapi
|
||||
} // namespace InferenceEngine
|
||||
|
@ -202,7 +202,8 @@ CV_ALWAYS_INLINE void verticalPass_anylpi_8U(const uint8_t* src0[], const uint8_
|
||||
}
|
||||
|
||||
template<int chanNum>
|
||||
CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
CV_ALWAYS_INLINE bool calcRowLinear_8UC_Impl(avx2_tag,
|
||||
std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
@ -214,6 +215,9 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
constexpr int half_nlanes = (v_uint8::nlanes / 2);
|
||||
if ((inSz.width * chanNum < half_nlanes) || (outSz.width < half_nlanes))
|
||||
return false;
|
||||
|
||||
const int shift = (half_nlanes / 4);
|
||||
|
||||
if (4 == lpi) {
|
||||
@ -286,41 +290,9 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>,
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC3)
|
||||
void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size &inSz,
|
||||
const Size &outSz,
|
||||
int lpi) {
|
||||
constexpr const int chanNum = 3;
|
||||
|
||||
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size &inSz,
|
||||
const Size &outSz,
|
||||
int lpi) {
|
||||
constexpr const int chanNum = 4;
|
||||
|
||||
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
CV_ALWAYS_INLINE void horizontalPass_lpi4_8UC1(const short clone[], const short mapsx[],
|
||||
uint8_t tmp[], uint8_t* dst[], const int& length) {
|
||||
@ -389,6 +361,43 @@ CV_ALWAYS_INLINE void horizontalPass_anylpi_8U(const short alpha[], const short
|
||||
}
|
||||
} // namespace avx
|
||||
|
||||
// Resize (bi-linear, 8UC3)
|
||||
template<>
|
||||
bool calcRowLinear8UC3C4Impl<avx2_tag, 3>(avx2_tag,
|
||||
std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int ) {
|
||||
constexpr int chanNum = 3;
|
||||
return avx::calcRowLinear_8UC_Impl<chanNum>(avx2_tag{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
template<>
|
||||
bool calcRowLinear8UC3C4Impl<avx2_tag, 4>(avx2_tag,
|
||||
std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int ) {
|
||||
constexpr int chanNum = 4;
|
||||
return avx::calcRowLinear_8UC_Impl<chanNum>(avx2_tag{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// 8UC1 Resize (bi-linear)
|
||||
template<>
|
||||
|
@ -39,48 +39,6 @@ void calcRowArea_CVKL_U8_SSE42(const uchar * src[],
|
||||
int y_max_count,
|
||||
uint16_t vert_sum[]);
|
||||
#endif
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Resize (bi-linear, 8UC3)
|
||||
void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi);
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi);
|
||||
|
||||
template<int numChan>
|
||||
void calcRowLinear_8UC(std::array<std::array<uint8_t*, 4>, numChan> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
calcRowLinear_8U(std::integral_constant<int, numChan>{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
} // namespace avx
|
||||
|
||||
template<typename isa_tag_t, typename T>
|
||||
@ -140,6 +98,13 @@ extern template void calcRowLinear32FC1Impl(avx2_tag, float* dst[], const float*
|
||||
const float alpha[], const int mapsx[],
|
||||
const float beta[], const Size& inSz, const Size& outSz,
|
||||
const int lpi, const int l);
|
||||
|
||||
template<typename isa_tag_t, int chs>
|
||||
bool calcRowLinear8UC3C4Impl(isa_tag_t, std::array<std::array<uint8_t*, 4>, chs>& dst,
|
||||
const uint8_t* src0[], const uint8_t* src1[],
|
||||
const short alpha[], const short clone[], const short mapsx[],
|
||||
const short beta[], uint8_t tmp[], const Size& inSz,
|
||||
const Size& outSz, const int lpi, const int l);
|
||||
} // namespace kernels
|
||||
} // namespace gapi
|
||||
} // namespace InferenceEngine
|
||||
|
@ -178,7 +178,8 @@ CV_ALWAYS_INLINE void verticalPass_anylpi_8U(const uint8_t* src0[], const uint8_
|
||||
|
||||
// Resize (bi-linear, 8U, generic number of channels)
|
||||
template<int chanNum>
|
||||
CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
CV_ALWAYS_INLINE bool calcRowLinear_8UC_Impl(avx512_tag,
|
||||
std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
@ -188,8 +189,11 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>,
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
const int lpi) {
|
||||
constexpr int half_nlanes = (v_uint8::nlanes / 2);
|
||||
if ((inSz.width * chanNum < half_nlanes) || (outSz.width < half_nlanes))
|
||||
return false;
|
||||
|
||||
constexpr int shift = (half_nlanes / 4);
|
||||
|
||||
if (4 == lpi) {
|
||||
@ -282,40 +286,7 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl(std::array<std::array<uint8_t*, 4>,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC3)
|
||||
void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size &inSz,
|
||||
const Size &outSz,
|
||||
int lpi) {
|
||||
constexpr const int chanNum = 3;
|
||||
|
||||
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size &inSz,
|
||||
const Size &outSz,
|
||||
int lpi) {
|
||||
constexpr const int chanNum = 4;
|
||||
|
||||
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
return true;
|
||||
}
|
||||
|
||||
CV_ALWAYS_INLINE void horizontalPass_lpi4_U8C1(const short clone[], const short mapsx[],
|
||||
@ -325,13 +296,13 @@ CV_ALWAYS_INLINE void horizontalPass_lpi4_U8C1(const short clone[], const short
|
||||
constexpr int half_nlanes = (v_uint8::nlanes / 2);
|
||||
GAPI_DbgAssert(width >= half_nlanes);
|
||||
|
||||
v_uint8 shuf_mask2 = v_setr_s8(0, 1, 4, 5, 8, 9, 12, 13,
|
||||
v_uint8 shuf_mask2 = v_setr_s8(0, 1, 4, 5, 8, 9, 12, 13,
|
||||
2, 3, 6, 7, 10, 11, 14, 15,
|
||||
0, 1, 4, 5, 8, 9, 12, 13,
|
||||
0, 1, 4, 5, 8, 9, 12, 13,
|
||||
2, 3, 6, 7, 10, 11, 14, 15,
|
||||
0, 1, 4, 5, 8, 9, 12, 13,
|
||||
0, 1, 4, 5, 8, 9, 12, 13,
|
||||
2, 3, 6, 7, 10, 11, 14, 15,
|
||||
0, 1, 4, 5, 8, 9, 12, 13,
|
||||
0, 1, 4, 5, 8, 9, 12, 13,
|
||||
2, 3, 6, 7, 10, 11, 14, 15);
|
||||
|
||||
v_uint32 permute_idxs1 = v_set_s32(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
|
||||
@ -397,6 +368,44 @@ CV_ALWAYS_INLINE void horizontalPass_anylpi_8U(const short alpha[], const short
|
||||
}
|
||||
} // namespace avx512
|
||||
|
||||
// Resize (bi-linear, 8UC3)
|
||||
template<>
|
||||
bool calcRowLinear8UC3C4Impl<avx512_tag, 3>(avx512_tag,
|
||||
std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int ) {
|
||||
constexpr int chanNum = 3;
|
||||
return avx512::calcRowLinear_8UC_Impl<chanNum>(avx512_tag{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
template<>
|
||||
bool calcRowLinear8UC3C4Impl<avx512_tag, 4>(avx512_tag,
|
||||
std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int ) {
|
||||
constexpr int chanNum = 4;
|
||||
return avx512::calcRowLinear_8UC_Impl<chanNum>(avx512_tag{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// 8UC1 Resize (bi-linear)
|
||||
template<>
|
||||
bool calcRowLinear8UC1Impl(avx512_tag,
|
||||
|
@ -39,49 +39,6 @@ void calcRowArea_CVKL_U8(const uchar * src[],
|
||||
int y_max_count,
|
||||
uint16_t vert_sum[]);
|
||||
#endif
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
// Resize (bi-linear, 8UC3)
|
||||
void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi);
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi);
|
||||
|
||||
template<int numChan>
|
||||
void calcRowLinear_8UC(std::array<std::array<uint8_t*, 4>, numChan> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
calcRowLinear_8U(std::integral_constant<int, numChan>{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
} // namespace avx512
|
||||
|
||||
template<typename isa_tag_t, typename T>
|
||||
@ -97,7 +54,7 @@ extern template void nv12ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const
|
||||
|
||||
template<typename isa_tag_t>
|
||||
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
|
||||
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||
|
||||
extern template void i420ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||
@ -138,6 +95,13 @@ extern template void calcRowLinear32FC1Impl(avx512_tag, float* dst[], const floa
|
||||
const float alpha[], const int mapsx[],
|
||||
const float beta[], const Size& inSz, const Size& outSz,
|
||||
const int lpi, const int l);
|
||||
|
||||
template<typename isa_tag_t, int chs>
|
||||
bool calcRowLinear8UC3C4Impl(isa_tag_t, std::array<std::array<uint8_t*, 4>, chs>& dst,
|
||||
const uint8_t* src0[], const uint8_t* src1[],
|
||||
const short alpha[], const short clone[], const short mapsx[],
|
||||
const short beta[], uint8_t tmp[], const Size& inSz,
|
||||
const Size& outSz, const int lpi, const int l);
|
||||
} // namespace kernels
|
||||
} // namespace gapi
|
||||
} // namespace InferenceEngine
|
||||
|
@ -66,10 +66,10 @@ bool calcRowLinear8UC1Impl(sse42_tag,
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int) {
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int) {
|
||||
constexpr int nlanes = v_uint8::nlanes;
|
||||
constexpr int half_nlanes = (v_uint8::nlanes / 2);
|
||||
|
||||
@ -520,18 +520,21 @@ bool calcRowLinear8UC1Impl(sse42_tag,
|
||||
#if 1
|
||||
// Resize (bi-linear, 8U, generic number of channels)
|
||||
template<int chanNum>
|
||||
void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size &inSz,
|
||||
const Size &outSz,
|
||||
int lpi) {
|
||||
CV_ALWAYS_INLINE bool calcRowLinear_8UC_Impl_(sse42_tag,
|
||||
std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
const int half_nlanes = (v_uint8::nlanes / 2);
|
||||
if ((inSz.width < half_nlanes) || (outSz.width < half_nlanes))
|
||||
return false;
|
||||
|
||||
if (4 == lpi) {
|
||||
// vertical pass
|
||||
@ -736,6 +739,7 @@ void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
// Resize 3C/4C universal intrinsic implementation for SSE42 version is a bit slower sometimes.
|
||||
@ -748,7 +752,7 @@ void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
uint8_t tmp[],
|
||||
const Size &inSz,
|
||||
const Size &outSz,
|
||||
int lpi) {
|
||||
@ -806,6 +810,8 @@ void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
}
|
||||
|
||||
// horizontal pass
|
||||
v_uint8 val_0, val_1, val_2, val_3;
|
||||
int shift = (half_nlanes / 4);
|
||||
GAPI_DbgAssert(outSz.width >= half_nlanes);
|
||||
for (int x = 0; x < outSz.width; ) {
|
||||
for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) {
|
||||
@ -814,14 +820,7 @@ void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
v_int16 a54 = vx_load(&clone[4 * (x + 4)]);
|
||||
v_int16 a76 = vx_load(&clone[4 * (x + 6)]);
|
||||
|
||||
v_uint8 val_0 = vx_setzero_u8();
|
||||
v_uint8 val_1 = vx_setzero_u8();
|
||||
v_uint8 val_2 = vx_setzero_u8();
|
||||
v_uint8 val_3 = vx_setzero_u8();
|
||||
|
||||
for (int c = 0; c < chanNum; ++c) {
|
||||
int shift = (half_nlanes / 4);
|
||||
|
||||
v_gather_channel(val_0, tmp, mapsx, chanNum, c, x, 0);
|
||||
v_gather_channel(val_1, tmp, mapsx, chanNum, c, x, shift);
|
||||
v_gather_channel(val_2, tmp, mapsx, chanNum, c, x, shift * 2);
|
||||
@ -913,35 +912,41 @@ void calcRowLinear_8UC_Impl_(std::array<std::array<uint8_t*, 4>, chanNum> &dst,
|
||||
#endif
|
||||
|
||||
// Resize (bi-linear, 8UC3)
|
||||
void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
template<>
|
||||
bool calcRowLinear8UC3C4Impl<sse42_tag, 3>(sse42_tag,
|
||||
std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int ) {
|
||||
constexpr int chanNum = 3;
|
||||
calcRowLinear_8UC_Impl_<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
return calcRowLinear_8UC_Impl_<chanNum>(sse42_tag{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi) {
|
||||
template<>
|
||||
bool calcRowLinear8UC3C4Impl<sse42_tag, 4>(sse42_tag,
|
||||
std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int ) {
|
||||
constexpr int chanNum = 4;
|
||||
calcRowLinear_8UC_Impl_<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
return calcRowLinear_8UC_Impl_<chanNum>(sse42_tag{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -40,88 +40,23 @@ void calcRowArea_CVKL_U8_SSE42(const uchar * src[],
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
// Resize (bi-linear, 8UC3)
|
||||
void calcRowLinear_8U(C3, std::array<std::array<uint8_t*, 4>, 3> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size &inSz,
|
||||
const Size &outSz,
|
||||
int lpi);
|
||||
|
||||
// Resize (bi-linear, 8UC4)
|
||||
void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size &inSz,
|
||||
const Size &outSz,
|
||||
int lpi);
|
||||
|
||||
template<int numChan>
|
||||
void calcRowLinear_8UC(std::array<std::array<uint8_t*, 4>, numChan> &dst,
|
||||
const uint8_t *src0[],
|
||||
const uint8_t *src1[],
|
||||
const short alpha[],
|
||||
const short clone[],
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size &inSz,
|
||||
const Size &outSz,
|
||||
int lpi) {
|
||||
calcRowLinear_8U(std::integral_constant<int, numChan>{}, dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||
}
|
||||
|
||||
template<typename isa_tag_t, typename T>
|
||||
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs,
|
||||
T* out, const int length);
|
||||
|
||||
extern template void chanToPlaneRowImpl(sse42_tag, const uint8_t* in, const int chan,
|
||||
const int chs, uint8_t* out, const int length);
|
||||
extern template void chanToPlaneRowImpl(sse42_tag, const float* in, const int chan,
|
||||
const int chs, float* out, const int length);
|
||||
template<typename isa_tag_t>
|
||||
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||
|
||||
extern template void nv12ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||
|
||||
template<typename isa_tag_t>
|
||||
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
|
||||
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||
|
||||
extern template void i420ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||
|
||||
template<typename isa_tag_t, typename T, int chs>
|
||||
void splitRowImpl(isa_tag_t, const T* in, std::array<T*, chs>& outs, const int length);
|
||||
|
||||
extern template void splitRowImpl<sse42_tag, uint8_t, 2>(sse42_tag, const uint8_t* in, std::array<uint8_t*, 2>& outs, const int length);
|
||||
extern template void splitRowImpl<sse42_tag, float, 2>(sse42_tag, const float* in, std::array<float*, 2>& outs, const int length);
|
||||
extern template void splitRowImpl<sse42_tag, uint8_t, 3>(sse42_tag, const uint8_t* in, std::array<uint8_t*, 3>& outs, const int length);
|
||||
extern template void splitRowImpl<sse42_tag, float, 3>(sse42_tag, const float* in, std::array<float*, 3>& outs, const int length);
|
||||
extern template void splitRowImpl<sse42_tag, uint8_t, 4>(sse42_tag, const uint8_t* in, std::array<uint8_t*, 4>& outs, const int length);
|
||||
extern template void splitRowImpl<sse42_tag, float, 4>(sse42_tag, const float* in, std::array<float*, 4>& outs, const int length);
|
||||
|
||||
template<typename isa_tag_t, typename T, int chs>
|
||||
void mergeRowImpl(isa_tag_t, const std::array<const T*, chs>& ins, T* out, const int length);
|
||||
|
||||
extern template void mergeRowImpl<sse42_tag, uint8_t, 2>(sse42_tag, const std::array<const uint8_t*, 2>& ins, uint8_t* out, const int length);
|
||||
extern template void mergeRowImpl<sse42_tag, float, 2>(sse42_tag, const std::array<const float*, 2>& ins, float* out, const int length);
|
||||
extern template void mergeRowImpl<sse42_tag, uint8_t, 3>(sse42_tag, const std::array<const uint8_t*, 3>& ins, uint8_t* out, const int length);
|
||||
extern template void mergeRowImpl<sse42_tag, float, 3>(sse42_tag, const std::array<const float*, 3>& ins, float* out, const int length);
|
||||
extern template void mergeRowImpl<sse42_tag, uint8_t, 4>(sse42_tag, const std::array<const uint8_t*, 4>& ins, uint8_t* out, const int length);
|
||||
extern template void mergeRowImpl<sse42_tag, float, 4>(sse42_tag, const std::array<const float*, 4>& ins, float* out, const int length);
|
||||
|
||||
template<typename isa_tag_t>
|
||||
bool calcRowLinear8UC1Impl(isa_tag_t, uint8_t* dst[], const uint8_t* src0[], const uint8_t* src1[],
|
||||
const short alpha[], const short clone[], const short mapsx[],
|
||||
@ -134,10 +69,12 @@ void calcRowLinear32FC1Impl(isa_tag_t, float* dst[], const float* src0[], const
|
||||
const float beta[], const Size& inSz, const Size& outSz,
|
||||
const int lpi, const int l);
|
||||
|
||||
extern template void calcRowLinear32FC1Impl(sse42_tag, float* dst[], const float* src0[], const float* src1[],
|
||||
const float alpha[], const int mapsx[],
|
||||
const float beta[], const Size& inSz, const Size& outSz,
|
||||
const int lpi, const int l);
|
||||
template<typename isa_tag_t, int chs>
|
||||
bool calcRowLinear8UC3C4Impl(isa_tag_t, std::array<std::array<uint8_t*, 4>, chs>& dst,
|
||||
const uint8_t* src0[], const uint8_t* src1[],
|
||||
const short alpha[], const short clone[], const short mapsx[],
|
||||
const short beta[], uint8_t tmp[], const Size& inSz,
|
||||
const Size& outSz, const int lpi, const int l);
|
||||
} // namespace kernels
|
||||
} // namespace gapi
|
||||
} // namespace InferenceEngine
|
||||
|
@ -42,175 +42,52 @@
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace gapi {
|
||||
|
||||
//using namespace kernels;
|
||||
|
||||
namespace kernels {
|
||||
|
||||
using isas_set = typelist<
|
||||
#ifdef HAVE_AVX512
|
||||
avx512_tag,
|
||||
#endif
|
||||
#ifdef HAVE_AVX2
|
||||
avx2_tag,
|
||||
#endif
|
||||
#ifdef HAVE_SSE
|
||||
sse42_tag,
|
||||
#endif
|
||||
#ifdef HAVE_NEON
|
||||
neon_tag,
|
||||
#endif
|
||||
//scalar "ISA" have to be the last one in the list,
|
||||
//as the search for supported ISA is performed until first match
|
||||
scalar_tag>;
|
||||
#ifdef HAVE_AVX512
|
||||
bool is_present(avx512_tag) { return with_cpu_x86_avx512f(); }
|
||||
#endif // HAVE_AVX512
|
||||
|
||||
#ifdef HAVE_AVX2
|
||||
bool is_present(avx2_tag) { return with_cpu_x86_avx2(); }
|
||||
#endif // HAVE_AVX2
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
bool is_present(sse42_tag) { return with_cpu_x86_sse42(); }
|
||||
#endif // HAVE_SSE
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
bool is_present(neon_tag) { return true; }
|
||||
#endif // HAVE_NEON
|
||||
|
||||
//scalar version of kernels is always available
|
||||
bool is_present(scalar_tag) { return true; }
|
||||
|
||||
struct is_isa_present {
|
||||
template< typename isa_tag_t>
|
||||
bool operator()(type_to_type<isa_tag_t>) {
|
||||
return is_present(isa_tag_t{});
|
||||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
struct fp_16_t {
|
||||
int16_t v;
|
||||
};
|
||||
|
||||
|
||||
template<typename type>
|
||||
struct cv_type_to_depth;
|
||||
|
||||
template<> struct cv_type_to_depth<std::uint8_t> { enum { depth = CV_8U }; };
|
||||
template<> struct cv_type_to_depth<std::int8_t> { enum { depth = CV_8S }; };
|
||||
template<> struct cv_type_to_depth<std::uint16_t> { enum { depth = CV_16U }; };
|
||||
template<> struct cv_type_to_depth<std::int16_t> { enum { depth = CV_16S }; };
|
||||
template<> struct cv_type_to_depth<std::int32_t> { enum { depth = CV_32S }; };
|
||||
template<> struct cv_type_to_depth<float> { enum { depth = CV_32F }; };
|
||||
template<> struct cv_type_to_depth<fp_16_t> { enum { depth = CV_16F }; };
|
||||
|
||||
template<typename ... types>
|
||||
struct typelist {};
|
||||
|
||||
template<typename type_list>
|
||||
struct head;
|
||||
|
||||
template<template<typename ...> class list, typename head_t, typename ... types>
|
||||
struct head<list<head_t, types...>> { using type = head_t;};
|
||||
|
||||
template<typename typelist>
|
||||
using head_t = typename head<typelist>::type;
|
||||
|
||||
template<typename type>
|
||||
struct type_to_type {};
|
||||
|
||||
template <typename typelist>
|
||||
struct type_dispatch_impl;
|
||||
|
||||
//FIXME: add test for type_dispatch
|
||||
template <template<typename ...> class typelist, typename... type>
|
||||
struct type_dispatch_impl<typelist<type...>> {
|
||||
template <typename result_t, typename default_t, typename type_id_t, typename type_to_id_t, typename type_to_value_t>
|
||||
static result_t dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value) {
|
||||
result_t res = default_value;
|
||||
|
||||
bool matched = false;
|
||||
std::initializer_list<int> ({
|
||||
!matched && (type_id == type_to_id(type_to_type<type>{})) ?
|
||||
(matched = true, res = type_to_value(type_to_type<type>{})), 0
|
||||
: 0
|
||||
...
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename result_t, typename default_t, typename pred_t, typename type_to_value_t>
|
||||
static result_t dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value) {
|
||||
result_t res = default_value;
|
||||
|
||||
bool matched = false;
|
||||
std::initializer_list<int> ({
|
||||
!matched && pred(type_to_type<type>{}) ?
|
||||
(matched = true, res = type_to_value(type_to_type<type>{})), 0
|
||||
: 0
|
||||
...
|
||||
});
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename left_typelsist, typename right_typelsist>
|
||||
struct concat;
|
||||
|
||||
template<typename left_typelsist, typename right_typelsist>
|
||||
using concat_t = typename concat<left_typelsist, right_typelsist>::type;
|
||||
|
||||
template<template<typename ...> class left_list, typename ... left_types, template<typename ...> class right_list, typename ... right_types>
|
||||
struct concat<left_list<left_types...>, right_list<right_types...>>{
|
||||
using type = left_list<left_types... , right_types...>;
|
||||
};
|
||||
|
||||
template< class T, class U >
|
||||
using is_same_t = typename std::is_same<T, U>::type;
|
||||
|
||||
template<bool C, class T, class E> struct if_c_impl;
|
||||
|
||||
template<class T, class E> struct if_c_impl<true, T, E> {
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template<class T, class E> struct if_c_impl<false, T, E> {
|
||||
using type = E;
|
||||
};
|
||||
|
||||
template<bool C, class T, class E>
|
||||
using if_c = typename if_c_impl<C, T, E>::type;
|
||||
|
||||
template<class C, class T, class E>
|
||||
using if_ = typename if_c_impl<C::value != 0, T, E>::type;
|
||||
|
||||
template<typename typelist, typename type>
|
||||
struct remove;
|
||||
|
||||
template<typename typelist, typename type>
|
||||
using remove_t = typename remove<typelist, type>::type;
|
||||
|
||||
|
||||
template<template<typename ...> class list, typename head_t, typename ... types, typename t>
|
||||
struct remove<list<head_t, types...>, t> {
|
||||
using type = concat_t<
|
||||
if_<is_same_t<head_t, t>, list<>, list<head_t>>,
|
||||
remove_t<list<types...>, t>
|
||||
>;
|
||||
};
|
||||
|
||||
template<template<typename ...> class list, typename t>
|
||||
struct remove<list<>, t> {
|
||||
using type = list<>;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename typelist, typename default_t, typename type_id_t, typename type_to_id_t, typename type_to_value_t,
|
||||
typename result_t = decltype(std::declval<type_to_value_t>()(type_to_type<head_t<typelist>> {}))>
|
||||
inline result_t type_dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value = {}) {
|
||||
return type_dispatch_impl<typelist>::template dispatch<result_t>(std::forward<type_id_t>(type_id),
|
||||
std::forward<type_to_id_t>(type_to_id),
|
||||
std::forward<type_to_value_t>(type_to_value),
|
||||
std::forward<default_t>(default_value));
|
||||
}
|
||||
|
||||
template <typename typelist, typename default_t, typename pred_t, typename type_to_value_t,
|
||||
typename result_t = decltype(std::declval<type_to_value_t>()(type_to_type<head_t<typelist>> {}))>
|
||||
inline result_t type_dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value = {}) {
|
||||
return type_dispatch_impl<typelist>::template dispatch<result_t>(std::forward<pred_t>(pred),
|
||||
std::forward<type_to_value_t>(type_to_value),
|
||||
std::forward<default_t>(default_value));
|
||||
}
|
||||
namespace {
|
||||
|
||||
struct cv_type_id {
|
||||
template <typename type>
|
||||
const int operator()(type_to_type<type> ) { return cv_type_to_depth<type>::depth;}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename typelist>
|
||||
bool is_cv_type_in_list(const int type_id) {
|
||||
return type_dispatch<typelist>(type_id, cv_type_id{}, [](...){ return true;}, false);
|
||||
}
|
||||
|
||||
|
||||
G_TYPED_KERNEL(ScalePlane8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_8u") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc & in, const Size & sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_8U && in.chan == 1);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(ScalePlane32f, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_32f") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc & in, const Size & sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
namespace {
|
||||
using merge_supported_types = typelist<uint8_t, int8_t, uint16_t, int16_t, int32_t, float, fp_16_t>;
|
||||
|
||||
template<typename T, int chs>
|
||||
@ -315,49 +192,6 @@ struct typed_split_row {
|
||||
};
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
using isas_set = typelist<
|
||||
#ifdef HAVE_AVX512
|
||||
avx512_tag,
|
||||
#endif
|
||||
#ifdef HAVE_AVX2
|
||||
avx2_tag,
|
||||
#endif
|
||||
#ifdef HAVE_SSE
|
||||
sse42_tag,
|
||||
#endif
|
||||
#ifdef HAVE_NEON
|
||||
neon_tag,
|
||||
#endif
|
||||
//scalar "ISA" have to be the last one in the list,
|
||||
//as the search for supported ISA is performed until first match
|
||||
scalar_tag>;
|
||||
#ifdef HAVE_AVX512
|
||||
bool is_present(avx512_tag) { return with_cpu_x86_avx512f(); }
|
||||
#endif // HAVE_AVX512
|
||||
|
||||
#ifdef HAVE_AVX2
|
||||
bool is_present(avx2_tag) { return with_cpu_x86_avx2(); }
|
||||
#endif // HAVE_AVX2
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
bool is_present(sse42_tag) { return with_cpu_x86_sse42(); }
|
||||
#endif // HAVE_SSE
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
bool is_present(neon_tag) { return true; }
|
||||
#endif // HAVE_NEON
|
||||
|
||||
//scalar version of kernels is always available
|
||||
bool is_present(scalar_tag) { return true; }
|
||||
|
||||
struct is_isa_present {
|
||||
template< typename isa_tag_t>
|
||||
bool operator()(type_to_type<isa_tag_t>) {
|
||||
return is_present(isa_tag_t{});
|
||||
}
|
||||
};
|
||||
|
||||
// GAPI_OCV_KERNEL(OCVChanToPlane, ChanToPlane) {
|
||||
// static void run(const cv::Mat &in, int chan, cv::Mat &out) {
|
||||
// out.create(in.rows, in.cols, in.depth());
|
||||
@ -806,8 +640,8 @@ struct typed_resizeLinearF32C1 {
|
||||
const float alpha[], const int mapsx[],
|
||||
const float beta[], const Size& inSz,
|
||||
const Size& outSz, const int lpi, const int length) {
|
||||
calcRowLinear32FC1Impl(isa_tag_t{}, dst, src0, src1, alpha,
|
||||
mapsx, beta, inSz, outSz, lpi, length);
|
||||
calcRowLinear32FC1Impl(isa_tag_t{}, dst, src0, src1, alpha,
|
||||
mapsx, beta, inSz, outSz, lpi, length);
|
||||
};
|
||||
}
|
||||
|
||||
@ -818,8 +652,87 @@ struct typed_resizeLinearF32C1 {
|
||||
const float alpha[], const int mapsx[],
|
||||
const float beta[], const Size& inSz,
|
||||
const Size& outSz, const int lpi, const int length) {
|
||||
calcRowLinear32FC1Impl<Mapper>(isa_tag_t{}, dst, src0, src1, alpha,
|
||||
mapsx, beta, inSz, outSz, lpi, length);
|
||||
calcRowLinear32FC1Impl<Mapper>(isa_tag_t{}, dst, src0, src1, alpha,
|
||||
mapsx, beta, inSz, outSz, lpi, length);
|
||||
};
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
namespace {
|
||||
|
||||
using resizeLinearU8C3C4_suptypes = typelist<uint8_t>;
|
||||
|
||||
template<class Mapper, int chs>
|
||||
inline void calcRowLinear8UC3C4Impl(scalar_tag,
|
||||
std::array<std::array<uint8_t*, 4>, chs>& dst,
|
||||
const uint8_t* src0[],
|
||||
const uint8_t* src1[],
|
||||
const short alpha[],
|
||||
const short clone[], // 4 clones of alpha
|
||||
const short mapsx[],
|
||||
const short beta[],
|
||||
uint8_t tmp[],
|
||||
const Size& inSz,
|
||||
const Size& outSz,
|
||||
const int lpi,
|
||||
const int length) {
|
||||
using alpha_type = typename Mapper::alpha_type;
|
||||
for (int l = 0; l < lpi; l++) {
|
||||
constexpr static const auto unity = Mapper::unity;
|
||||
|
||||
auto beta0 = beta[l];
|
||||
auto beta1 = saturate_cast<alpha_type>(unity - beta[l]);
|
||||
|
||||
for (int x = 0; x < length; x++) {
|
||||
auto alpha0 = alpha[x];
|
||||
auto alpha1 = saturate_cast<alpha_type>(unity - alpha[x]);
|
||||
auto sx0 = mapsx[x];
|
||||
auto sx1 = sx0 + 1;
|
||||
|
||||
for (int c = 0; c < chs; c++) {
|
||||
auto idx0 = chs * sx0 + c;
|
||||
auto idx1 = chs * sx1 + c;
|
||||
uint8_t tmp0 = calc(beta0, src0[l][idx0], beta1, src1[l][idx0]);
|
||||
uint8_t tmp1 = calc(beta0, src0[l][idx1], beta1, src1[l][idx1]);
|
||||
dst[c][l][x] = calc(alpha0, tmp0, alpha1, tmp1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename isa_tag_t, class Mapper, int chs>
|
||||
struct typed_resizeLinearU8C3C4 {
|
||||
using p_f = void (*)(std::array<std::array<uint8_t*, 4>, chs>& dst, const uint8_t* src0[], const uint8_t* src1[],
|
||||
const short alpha[], const short clone[], const short mapsx[],
|
||||
const short beta[], uint8_t tmp[], const Size& inSz,
|
||||
const Size& outSz, const int lpi, const int length);
|
||||
|
||||
template<typename tag = isa_tag_t>
|
||||
inline typename std::enable_if<!std::is_same<tag, scalar_tag>::value, p_f>::type
|
||||
operator()(type_to_type<uint8_t>) {
|
||||
return [](std::array<std::array<uint8_t*, 4>, chs>& dst, const uint8_t* src0[], const uint8_t* src1[],
|
||||
const short alpha[], const short clone[], const short mapsx[],
|
||||
const short beta[], uint8_t tmp[], const Size& inSz,
|
||||
const Size& outSz, const int lpi, const int length) {
|
||||
if (!calcRowLinear8UC3C4Impl<isa_tag_t, chs>(isa_tag_t{}, dst, src0,
|
||||
src1, alpha, clone,
|
||||
mapsx, beta, tmp,
|
||||
inSz, outSz, lpi, length))
|
||||
calcRowLinear8UC3C4Impl<Mapper, chs>(scalar_tag{}, dst, src0, src1, alpha, clone,
|
||||
mapsx, beta, tmp, inSz, outSz, lpi, length);
|
||||
};
|
||||
}
|
||||
|
||||
template<typename tag = isa_tag_t>
|
||||
inline typename std::enable_if<std::is_same<tag, scalar_tag>::value, p_f>::type
|
||||
operator()(type_to_type<uint8_t>) {
|
||||
return [](std::array<std::array<uint8_t*, 4>, chs>& dst, const uint8_t* src0[],
|
||||
const uint8_t* src1[], const short alpha[], const short clone[],
|
||||
const short mapsx[], const short beta[], uint8_t tmp[], const Size& inSz,
|
||||
const Size& outSz, const int lpi, const int length) {
|
||||
calcRowLinear8UC3C4Impl<Mapper, chs>(isa_tag_t{}, dst, src0, src1, alpha, clone,
|
||||
mapsx, beta, tmp, inSz, outSz, lpi, length);
|
||||
};
|
||||
}
|
||||
};
|
||||
@ -1129,6 +1042,107 @@ GAPI_FLUID_KERNEL(FScalePlane32f, ScalePlane32f, true) {
|
||||
resizeLinearF32C1_suptypes>(in, out, scratch);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, class Mapper, int chs>
|
||||
static inline void calcRowLinearC(const cv::gapi::fluid::View& in,
|
||||
std::array<std::reference_wrapper<cv::gapi::fluid::Buffer>, chs>& out,
|
||||
cv::gapi::fluid::Buffer& scratch) {
|
||||
GAPI_DbgAssert(is_cv_type_in_list<resizeLinearU8C3C4_suptypes>(in.meta().depth));
|
||||
|
||||
auto inSz = in.meta().size;
|
||||
auto outSz = out[0].get().meta().size;
|
||||
|
||||
auto inY = in.y();
|
||||
auto outY = out[0].get().y();
|
||||
auto lpi = out[0].get().lpi();
|
||||
|
||||
GAPI_DbgAssert(outY + lpi <= outSz.height);
|
||||
GAPI_DbgAssert(lpi <= 4);
|
||||
|
||||
linearScratchDesc<T, Mapper, chs> scr(inSz.width, inSz.height, outSz.width, outSz.height, scratch.OutLineB());
|
||||
|
||||
const auto *alpha = scr.alpha;
|
||||
const auto *clone = scr.clone;
|
||||
const auto *mapsx = scr.mapsx;
|
||||
const auto *beta0 = scr.beta;
|
||||
const auto *mapsy = scr.mapsy;
|
||||
auto *tmp = scr.tmp;
|
||||
|
||||
const auto *beta = beta0 + outY;
|
||||
const T *src0[4];
|
||||
const T *src1[4];
|
||||
std::array<std::array<T*, 4>, chs> dst;
|
||||
|
||||
for (int l = 0; l < lpi; l++) {
|
||||
auto index0 = mapsy[outY + l] - inY;
|
||||
auto index1 = mapsy[outSz.height + outY + l] - inY;
|
||||
src0[l] = in.InLine<const T>(index0);
|
||||
src1[l] = in.InLine<const T>(index1);
|
||||
for (int c=0; c < chs; c++) {
|
||||
dst[c][l] = out[c].get().template OutLine<T>(l);
|
||||
}
|
||||
}
|
||||
auto length = out[0].get().length();
|
||||
|
||||
const auto rowFunc = type_dispatch<resizeLinearU8C3C4_suptypes>(in.meta().depth,
|
||||
cv_type_id{},
|
||||
typed_resizeLinearU8C3C4<isa_tag_t, Mapper, chs>{},
|
||||
nullptr);
|
||||
GAPI_DbgAssert(rowFunc);
|
||||
|
||||
rowFunc(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi, length);
|
||||
}
|
||||
|
||||
GAPI_FLUID_KERNEL(FScalePlanes, ScalePlanes, true) {
|
||||
static const int Window = 1;
|
||||
static const int LPI = 4;
|
||||
static const auto Kind = cv::GFluidKernel::Kind::Resize;
|
||||
|
||||
static void initScratch(const cv::GMatDesc& in, int, Size,
|
||||
Size outSz, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer &scratch) {
|
||||
initScratchLinear<uchar, linear::Mapper, 3>(in, outSz, scratch, LPI);
|
||||
}
|
||||
|
||||
static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
|
||||
}
|
||||
|
||||
static void run(const cv::gapi::fluid::View& in, int, Size, Size/*sz*/, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer& out1,
|
||||
cv::gapi::fluid::Buffer& out2,
|
||||
cv::gapi::fluid::Buffer& out3,
|
||||
cv::gapi::fluid::Buffer& scratch) {
|
||||
constexpr int numChan = 3;
|
||||
std::array<std::reference_wrapper<cv::gapi::fluid::Buffer>, numChan> out = {out1, out2, out3};
|
||||
calcRowLinearC<uint8_t, linear::Mapper, numChan>(in, out, scratch);
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_FLUID_KERNEL(FScalePlanes4, ScalePlanes4, true) {
|
||||
static const int Window = 1;
|
||||
static const int LPI = 4;
|
||||
static const auto Kind = cv::GFluidKernel::Kind::Resize;
|
||||
|
||||
static void initScratch(const cv::GMatDesc& in, int, Size,
|
||||
Size outSz, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer &scratch) {
|
||||
initScratchLinear<uchar, linear::Mapper, 4>(in, outSz, scratch, LPI);
|
||||
}
|
||||
|
||||
static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
|
||||
}
|
||||
|
||||
static void run(const cv::gapi::fluid::View& in, int, Size, Size/*sz*/, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer& out1,
|
||||
cv::gapi::fluid::Buffer& out2,
|
||||
cv::gapi::fluid::Buffer& out3,
|
||||
cv::gapi::fluid::Buffer& out4,
|
||||
cv::gapi::fluid::Buffer& scratch) {
|
||||
constexpr int numChan = 4;
|
||||
std::array<std::reference_wrapper<cv::gapi::fluid::Buffer>, numChan> out = {out1, out2, out3, out4};
|
||||
calcRowLinearC<uint8_t, linear::Mapper, numChan>(in, out, scratch);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
namespace {
|
||||
@ -1162,6 +1176,8 @@ struct Split_ResizeISA {
|
||||
pckg.include<typename choose_impl<isa_tag_t>::FSplit4>();
|
||||
pckg.include<typename choose_impl<isa_tag_t>::FScalePlane8u>();
|
||||
pckg.include<typename choose_impl<isa_tag_t>::FScalePlane32f>();
|
||||
pckg.include<typename choose_impl<isa_tag_t>::FScalePlanes>();
|
||||
pckg.include<typename choose_impl<isa_tag_t>::FScalePlanes4>();
|
||||
//at the moment type_dispatch requires something to be returned by the lambda
|
||||
return true;
|
||||
}
|
||||
@ -1185,38 +1201,6 @@ inline cv::gapi::GKernelPackage FKernelsChooseISA() {
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
G_TYPED_KERNEL(UpscalePlaneArea8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.upscale_plane_area_8u") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_8U && in.chan == 1);
|
||||
GAPI_DbgAssert(in.size.width < sz.width || in.size.height < sz.height);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(UpscalePlaneArea32f, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.upscale_plane_area_32f") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
|
||||
GAPI_DbgAssert(in.size.width < sz.width || in.size.height < sz.height);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(ScalePlaneArea8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_area_8u") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_8U && in.chan == 1);
|
||||
GAPI_DbgAssert(in.size.width >= sz.width && in.size.height >= sz.height);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(ScalePlaneArea32f, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_area_32f") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in, const Size &sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
|
||||
GAPI_DbgAssert(in.size.width >= sz.width && in.size.height >= sz.height);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_COMPOUND_KERNEL(FScalePlane, ScalePlane) {
|
||||
static cv::GMat expand(cv::GMat in, int type, const Size& szIn, const Size& szOut, int interp) {
|
||||
GAPI_DbgAssert(CV_8UC1 == type || CV_32FC1 == type);
|
||||
@ -1307,149 +1291,6 @@ static void calcRowLinear(const cv::gapi::fluid::View & in,
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, class Mapper, int numChan>
|
||||
static void calcRowLinearC(const cv::gapi::fluid::View & in,
|
||||
std::array<std::reference_wrapper<cv::gapi::fluid::Buffer>, numChan>& out,
|
||||
cv::gapi::fluid::Buffer& scratch) {
|
||||
using alpha_type = typename Mapper::alpha_type;
|
||||
|
||||
auto inSz = in.meta().size;
|
||||
auto outSz = out[0].get().meta().size;
|
||||
|
||||
auto inY = in.y();
|
||||
auto outY = out[0].get().y();
|
||||
auto lpi = out[0].get().lpi();
|
||||
|
||||
GAPI_DbgAssert(outY + lpi <= outSz.height);
|
||||
GAPI_DbgAssert(lpi <= 4);
|
||||
|
||||
linearScratchDesc<T, Mapper, numChan> scr(inSz.width, inSz.height, outSz.width, outSz.height, scratch.OutLineB());
|
||||
|
||||
const auto *alpha = scr.alpha;
|
||||
const auto *clone = scr.clone;
|
||||
const auto *mapsx = scr.mapsx;
|
||||
const auto *beta0 = scr.beta;
|
||||
const auto *mapsy = scr.mapsy;
|
||||
auto *tmp = scr.tmp;
|
||||
|
||||
const auto *beta = beta0 + outY;
|
||||
const T *src0[4];
|
||||
const T *src1[4];
|
||||
std::array<std::array<T*, 4>, numChan> dst;
|
||||
|
||||
for (int l = 0; l < lpi; l++) {
|
||||
auto index0 = mapsy[outY + l] - inY;
|
||||
auto index1 = mapsy[outSz.height + outY + l] - inY;
|
||||
src0[l] = in.InLine<const T>(index0);
|
||||
src1[l] = in.InLine<const T>(index1);
|
||||
for (int c=0; c < numChan; c++) {
|
||||
dst[c][l] = out[c].get().template OutLine<T>(l);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_AVX512
|
||||
if (with_cpu_x86_avx512_core()) {
|
||||
if (std::is_same<T, uint8_t>::value) {
|
||||
if (inSz.width >= 64 && outSz.width >= 32) {
|
||||
avx512::calcRowLinear_8UC<numChan>(dst,
|
||||
reinterpret_cast<const uint8_t**>(src0),
|
||||
reinterpret_cast<const uint8_t**>(src1),
|
||||
reinterpret_cast<const short*>(alpha),
|
||||
reinterpret_cast<const short*>(clone),
|
||||
reinterpret_cast<const short*>(mapsx),
|
||||
reinterpret_cast<const short*>(beta),
|
||||
reinterpret_cast<uint8_t*>(tmp),
|
||||
inSz, outSz, lpi);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void)tmp;
|
||||
(void)clone;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_AVX2
|
||||
if (with_cpu_x86_avx2()) {
|
||||
if (std::is_same<T, uint8_t>::value) {
|
||||
if (inSz.width >= 32 && outSz.width >= 16) {
|
||||
avx::calcRowLinear_8UC<numChan>(dst,
|
||||
reinterpret_cast<const uint8_t**>(src0),
|
||||
reinterpret_cast<const uint8_t**>(src1),
|
||||
reinterpret_cast<const short*>(alpha),
|
||||
reinterpret_cast<const short*>(clone),
|
||||
reinterpret_cast<const short*>(mapsx),
|
||||
reinterpret_cast<const short*>(beta),
|
||||
reinterpret_cast<uint8_t*>(tmp),
|
||||
inSz, outSz, lpi);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
if (with_cpu_x86_sse42()) {
|
||||
if (std::is_same<T, uint8_t>::value) {
|
||||
if (inSz.width >= 16 && outSz.width >= 8) {
|
||||
calcRowLinear_8UC<numChan>(dst,
|
||||
reinterpret_cast<const uint8_t**>(src0),
|
||||
reinterpret_cast<const uint8_t**>(src1),
|
||||
reinterpret_cast<const short*>(alpha),
|
||||
reinterpret_cast<const short*>(clone),
|
||||
reinterpret_cast<const short*>(mapsx),
|
||||
reinterpret_cast<const short*>(beta),
|
||||
reinterpret_cast<uint8_t*>(tmp),
|
||||
inSz, outSz, lpi);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // HAVE_SSE
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
if (std::is_same<T, uint8_t>::value) {
|
||||
if (inSz.width >= 16 && outSz.width >= 8) {
|
||||
neon::calcRowLinear_8UC<numChan>(dst,
|
||||
reinterpret_cast<const uint8_t**>(src0),
|
||||
reinterpret_cast<const uint8_t**>(src1),
|
||||
reinterpret_cast<const short*>(alpha),
|
||||
reinterpret_cast<const short*>(clone),
|
||||
reinterpret_cast<const short*>(mapsx),
|
||||
reinterpret_cast<const short*>(beta),
|
||||
reinterpret_cast<uint8_t*>(tmp),
|
||||
inSz, outSz, lpi);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif // HAVE_NEON
|
||||
|
||||
auto length = out[0].get().length();
|
||||
|
||||
for (int l = 0; l < lpi; l++) {
|
||||
constexpr static const auto unity = Mapper::unity;
|
||||
|
||||
auto beta0 = beta[l];
|
||||
auto beta1 = saturate_cast<alpha_type>(unity - beta[l]);
|
||||
|
||||
for (int x = 0; x < length; x++) {
|
||||
auto alpha0 = alpha[x];
|
||||
auto alpha1 = saturate_cast<alpha_type>(unity - alpha[x]);
|
||||
auto sx0 = mapsx[x];
|
||||
auto sx1 = sx0 + 1;
|
||||
|
||||
for (int c = 0; c < numChan; c++) {
|
||||
auto idx0 = numChan*sx0 + c;
|
||||
auto idx1 = numChan*sx1 + c;
|
||||
T tmp0 = calc(beta0, src0[l][idx0], beta1, src1[l][idx0]);
|
||||
T tmp1 = calc(beta0, src0[l][idx1], beta1, src1[l][idx1]);
|
||||
dst[c][l][x] = calc(alpha0, tmp0, alpha1, tmp1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
namespace areaUpscale {
|
||||
struct Mapper {
|
||||
@ -2138,57 +1979,6 @@ GAPI_FLUID_KERNEL(FScalePlane8u, ScalePlane8u, true) {
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_FLUID_KERNEL(FScalePlanes, ScalePlanes, true) {
|
||||
static const int Window = 1;
|
||||
static const int LPI = 4;
|
||||
static const auto Kind = cv::GFluidKernel::Kind::Resize;
|
||||
|
||||
static void initScratch(const cv::GMatDesc& in, int, Size,
|
||||
Size outSz, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer &scratch) {
|
||||
initScratchLinear<uchar, linear::Mapper, 3>(in, outSz, scratch, LPI);
|
||||
}
|
||||
|
||||
static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
|
||||
}
|
||||
|
||||
static void run(const cv::gapi::fluid::View& in, int, Size, Size/*sz*/, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer& out1,
|
||||
cv::gapi::fluid::Buffer& out2,
|
||||
cv::gapi::fluid::Buffer& out3,
|
||||
cv::gapi::fluid::Buffer& scratch) {
|
||||
constexpr int numChan = 3;
|
||||
std::array<std::reference_wrapper<cv::gapi::fluid::Buffer>, numChan> out = {out1, out2, out3};
|
||||
calcRowLinearC<uint8_t, linear::Mapper, numChan>(in, out, scratch);
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_FLUID_KERNEL(FScalePlanes4, ScalePlanes4, true) {
|
||||
static const int Window = 1;
|
||||
static const int LPI = 4;
|
||||
static const auto Kind = cv::GFluidKernel::Kind::Resize;
|
||||
|
||||
static void initScratch(const cv::GMatDesc& in, int, Size,
|
||||
Size outSz, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer &scratch) {
|
||||
initScratchLinear<uchar, linear::Mapper, 4>(in, outSz, scratch, LPI);
|
||||
}
|
||||
|
||||
static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
|
||||
}
|
||||
|
||||
static void run(const cv::gapi::fluid::View& in, int, Size, Size/*sz*/, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer& out1,
|
||||
cv::gapi::fluid::Buffer& out2,
|
||||
cv::gapi::fluid::Buffer& out3,
|
||||
cv::gapi::fluid::Buffer& out4,
|
||||
cv::gapi::fluid::Buffer& scratch) {
|
||||
constexpr int numChan = 4;
|
||||
std::array<std::reference_wrapper<cv::gapi::fluid::Buffer>, numChan> out = {out1, out2, out3, out4};
|
||||
calcRowLinearC<uint8_t, linear::Mapper, numChan>(in, out, scratch);
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_FLUID_KERNEL(FUpscalePlaneArea8u, UpscalePlaneArea8u, true) {
|
||||
static const int Window = 1;
|
||||
static const int LPI = 4;
|
||||
@ -2228,29 +2018,6 @@ GAPI_FLUID_KERNEL(FUpscalePlaneArea32f, UpscalePlaneArea32f, true) {
|
||||
calcRowLinear<float, areaUpscale32f::Mapper>(in, out, scratch);
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_FLUID_KERNEL(FScalePlane32f, ScalePlane32f, true) {
|
||||
static const int Window = 1;
|
||||
static const int LPI = 4;
|
||||
static const auto Kind = cv::GFluidKernel::Kind::Resize;
|
||||
|
||||
static void initScratch(const cv::GMatDesc& in,
|
||||
Size outSz, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer &scratch) {
|
||||
GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
|
||||
|
||||
initScratchLinear<float, linear32f::Mapper>(in, outSz, scratch, 0);
|
||||
}
|
||||
|
||||
static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {
|
||||
}
|
||||
|
||||
static void run(const cv::gapi::fluid::View& in, Size /*sz*/, int /*interp*/,
|
||||
cv::gapi::fluid::Buffer& out, cv::gapi::fluid::Buffer &scratch) {
|
||||
calcRowLinear<float, linear32f::Mapper>(in, out, scratch);
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
GAPI_FLUID_KERNEL(FScalePlaneArea32f, ScalePlaneArea32f, true) {
|
||||
@ -2435,9 +2202,7 @@ cv::gapi::GKernelPackage preprocKernels() {
|
||||
return combine(
|
||||
FKernelsChooseISA(),
|
||||
cv::gapi::kernels
|
||||
<FScalePlanes
|
||||
, FScalePlanes4
|
||||
, FScalePlane
|
||||
< FScalePlane
|
||||
, FUpscalePlaneArea8u
|
||||
, FUpscalePlaneArea32f
|
||||
, FScalePlaneArea8u
|
||||
|
@ -65,6 +65,52 @@ namespace gapi {
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(ScalePlane8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_8u") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc & in, const Size & sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_8U && in.chan == 1);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(ScalePlane32f, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_32f") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc & in, const Size & sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(UpscalePlaneArea8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.upscale_plane_area_8u") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc & in, const Size & sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_8U && in.chan == 1);
|
||||
GAPI_DbgAssert(in.size.width < sz.width || in.size.height < sz.height);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(UpscalePlaneArea32f, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.upscale_plane_area_32f") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc & in, const Size & sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
|
||||
GAPI_DbgAssert(in.size.width < sz.width || in.size.height < sz.height);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(ScalePlaneArea8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_area_8u") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc & in, const Size & sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_8U && in.chan == 1);
|
||||
GAPI_DbgAssert(in.size.width >= sz.width && in.size.height >= sz.height);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(ScalePlaneArea32f, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_area_32f") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc & in, const Size & sz, int) {
|
||||
GAPI_DbgAssert(in.depth == CV_32F && in.chan == 1);
|
||||
GAPI_DbgAssert(in.size.width >= sz.width && in.size.height >= sz.height);
|
||||
return in.withSize(sz);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(Merge2, <cv::GMat(cv::GMat, cv::GMat)>, "com.intel.ie.merge2") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in, const cv::GMatDesc &) {
|
||||
// FIXME: check a/b are equal!
|
||||
@ -165,5 +211,153 @@ namespace gapi {
|
||||
};
|
||||
cv::gapi::GKernelPackage preprocKernels();
|
||||
|
||||
|
||||
namespace kernels {
|
||||
|
||||
struct fp_16_t {
|
||||
int16_t v;
|
||||
};
|
||||
|
||||
template<typename type>
|
||||
struct cv_type_to_depth;
|
||||
|
||||
template<> struct cv_type_to_depth<std::uint8_t> { enum { depth = CV_8U }; };
|
||||
template<> struct cv_type_to_depth<std::int8_t> { enum { depth = CV_8S }; };
|
||||
template<> struct cv_type_to_depth<std::uint16_t> { enum { depth = CV_16U }; };
|
||||
template<> struct cv_type_to_depth<std::int16_t> { enum { depth = CV_16S }; };
|
||||
template<> struct cv_type_to_depth<std::int32_t> { enum { depth = CV_32S }; };
|
||||
template<> struct cv_type_to_depth<float> { enum { depth = CV_32F }; };
|
||||
template<> struct cv_type_to_depth<fp_16_t> { enum { depth = CV_16F }; };
|
||||
|
||||
template<typename ... types>
|
||||
struct typelist {};
|
||||
|
||||
template<typename type_list>
|
||||
struct head;
|
||||
|
||||
template<template<typename ...> class list, typename head_t, typename ... types>
|
||||
struct head<list<head_t, types...>> { using type = head_t; };
|
||||
|
||||
template<typename typelist>
|
||||
using head_t = typename head<typelist>::type;
|
||||
|
||||
template<typename type>
|
||||
struct type_to_type {};
|
||||
|
||||
template <typename typelist>
|
||||
struct type_dispatch_impl;
|
||||
|
||||
//FIXME: add test for type_dispatch
|
||||
template <template<typename ...> class typelist, typename... type>
|
||||
struct type_dispatch_impl<typelist<type...>> {
|
||||
template <typename result_t, typename default_t, typename type_id_t, typename type_to_id_t, typename type_to_value_t>
|
||||
static result_t dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value) {
|
||||
result_t res = default_value;
|
||||
|
||||
bool matched = false;
|
||||
std::initializer_list<int>({
|
||||
!matched && (type_id == type_to_id(type_to_type<type>{})) ?
|
||||
(matched = true, res = type_to_value(type_to_type<type>{})), 0
|
||||
: 0
|
||||
...
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename result_t, typename default_t, typename pred_t, typename type_to_value_t>
|
||||
static result_t dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value) {
|
||||
result_t res = default_value;
|
||||
|
||||
bool matched = false;
|
||||
std::initializer_list<int>({
|
||||
!matched && pred(type_to_type<type>{}) ?
|
||||
(matched = true, res = type_to_value(type_to_type<type>{})), 0
|
||||
: 0
|
||||
...
|
||||
});
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename left_typelsist, typename right_typelsist>
|
||||
struct concat;
|
||||
|
||||
template<typename left_typelsist, typename right_typelsist>
|
||||
using concat_t = typename concat<left_typelsist, right_typelsist>::type;
|
||||
|
||||
template<template<typename ...> class left_list, typename ... left_types, template<typename ...> class right_list, typename ... right_types>
|
||||
struct concat<left_list<left_types...>, right_list<right_types...>> {
|
||||
using type = left_list<left_types..., right_types...>;
|
||||
};
|
||||
|
||||
template< class T, class U >
|
||||
using is_same_t = typename std::is_same<T, U>::type;
|
||||
|
||||
template<bool C, class T, class E> struct if_c_impl;
|
||||
|
||||
template<class T, class E> struct if_c_impl<true, T, E> {
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template<class T, class E> struct if_c_impl<false, T, E> {
|
||||
using type = E;
|
||||
};
|
||||
|
||||
template<bool C, class T, class E>
|
||||
using if_c = typename if_c_impl<C, T, E>::type;
|
||||
|
||||
template<class C, class T, class E>
|
||||
using if_ = typename if_c_impl<C::value != 0, T, E>::type;
|
||||
|
||||
template<typename typelist, typename type>
|
||||
struct remove;
|
||||
|
||||
template<typename typelist, typename type>
|
||||
using remove_t = typename remove<typelist, type>::type;
|
||||
|
||||
|
||||
template<template<typename ...> class list, typename head_t, typename ... types, typename t>
|
||||
struct remove<list<head_t, types...>, t> {
|
||||
using type = concat_t<
|
||||
if_<is_same_t<head_t, t>, list<>, list<head_t>>,
|
||||
remove_t<list<types...>, t>
|
||||
>;
|
||||
};
|
||||
|
||||
template<template<typename ...> class list, typename t>
|
||||
struct remove<list<>, t> {
|
||||
using type = list<>;
|
||||
};
|
||||
|
||||
template <typename typelist, typename default_t, typename type_id_t, typename type_to_id_t, typename type_to_value_t,
|
||||
typename result_t = decltype(std::declval<type_to_value_t>()(type_to_type<head_t<typelist>> {})) >
|
||||
inline result_t type_dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value = {}) {
|
||||
return type_dispatch_impl<typelist>::template dispatch<result_t>(std::forward<type_id_t>(type_id),
|
||||
std::forward<type_to_id_t>(type_to_id),
|
||||
std::forward<type_to_value_t>(type_to_value),
|
||||
std::forward<default_t>(default_value));
|
||||
}
|
||||
|
||||
template <typename typelist, typename default_t, typename pred_t, typename type_to_value_t,
|
||||
typename result_t = decltype(std::declval<type_to_value_t>()(type_to_type<head_t<typelist>> {})) >
|
||||
inline result_t type_dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value = {}) {
|
||||
return type_dispatch_impl<typelist>::template dispatch<result_t>(std::forward<pred_t>(pred),
|
||||
std::forward<type_to_value_t>(type_to_value),
|
||||
std::forward<default_t>(default_value));
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct cv_type_id {
|
||||
template <typename type>
|
||||
const int operator()(type_to_type<type>) { return cv_type_to_depth<type>::depth; }
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename typelist>
|
||||
bool is_cv_type_in_list(const int type_id) {
|
||||
return type_dispatch<typelist>(type_id, cv_type_id{}, [](...) { return true; }, false);
|
||||
}
|
||||
} // namespace kernels
|
||||
} // namespace gapi
|
||||
} // namespace InferenceEngine
|
||||
|
Loading…
Reference in New Issue
Block a user