diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 8ae0ddc46b2..7ce356fce7b 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -92,19 +92,20 @@ protected: pop(rsi); } - void check_subnormals(const Xbyak::Reg64& src, const Xbyak::Ymm &mask, const Xbyak::Ymm &zero) { + void check_subnormals(const Xbyak::Reg64& src, const Xbyak::Ymm &exponent_mask, const Xbyak::Ymm &mantissa_mask, const Xbyak::Ymm &zero) { auto a = ymm1; auto b = ymm2; auto c = ymm3; vmovdqu(a, yword[src]); // load 8 floats - vpcmpeqd(b, a, zero); // if (a == 0) b = 1 else b = 0 - vpand(c, a, mask); // c = a & 01111111100000000000000000000000 + vpand(b, a, mantissa_mask); // b = a & 00000000011111111111111111111111 + vpcmpeqd(b, b, zero); // if (b == 0) b = 1 else b = 0 + vpand(c, a, exponent_mask); // c = a & 01111111100000000000000000000000 vpcmpeqd(c, c, zero); // if (c == 0) c = 1 else c = 0 vptest(b, c); // if ((!b & c) == 0) CF = 1 else CF = 0 } - void check_subnormals(const Xbyak::Reg64& src, const Xbyak::Xmm &mask, const Xbyak::Xmm &zero) { + void check_subnormals(const Xbyak::Reg64& src, const Xbyak::Xmm &exponent_mask, const Xbyak::Xmm &mantissa_mask, const Xbyak::Xmm &zero) { auto a = xmm1; auto b = xmm2; auto c = xmm3; @@ -112,10 +113,11 @@ protected: uni_vmovdqu(a, xword[src]); // load 4 floats uni_vmovdqu(b, a); // b = a uni_vmovdqu(c, a); // c = a - uni_vpcmpeqd(b, b, zero); // if (a == 0) b = 1 else b = 0 - uni_vpand(c, c, mask); // c = a & 01111111100000000000000000000000 - uni_vpcmpeqd(c, c, zero); // if (c == 0) c = 1 else c = 0 - uni_vtestps(b, c); // if ((!b & c) == 0) CF = 1 else CF = 0 + uni_vpand(b, b, mantissa_mask); // b = a & 00000000011111111111111111111111 + uni_vpcmpeqd(b, b, zero); // if (b == 0) b = 1 else b = 0 + uni_vpand(c, c, exponent_mask); // c = a & 01111111100000000000000000000000 + uni_vpcmpeqd(c, c, zero); // if (c == 0) c = 1 else c = 0 + uni_vtestps(b, c); // if ((!b & c) == 0) CF = 1 else CF = 0 } template @@ -130,12 +132,18 @@ protected: const Reg64 ®_idx = rsi; const Reg64 ®_mask_addr = r15; - static const uint32_t mask_data[8]; + static const uint32_t exponent_mask_data[8]; + static const uint32_t mantissa_mask_data[8]; }; -const uint32_t jit_has_subnormals_base::mask_data[8] = { - 0xFF << 23, 0xFF << 23, 0xFF << 23, 0xFF << 23, - 0xFF << 23, 0xFF << 23, 0xFF << 23, 0xFF << 23 +const uint32_t jit_has_subnormals_base::exponent_mask_data[8] = { + 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, + 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 +}; + +const uint32_t jit_has_subnormals_base::mantissa_mask_data[8] = { + 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, + 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff }; template<> @@ -143,6 +151,7 @@ struct jit_has_subnormals_base::reg { constexpr static uint32_t length = 8; constexpr static const Xbyak::Ymm & rmm4 = Xbyak::util::ymm4; constexpr static const Xbyak::Ymm & rmm5 = Xbyak::util::ymm5; + constexpr static const Xbyak::Ymm & rmm6 = Xbyak::util::ymm6; }; template<> @@ -150,6 +159,7 @@ struct jit_has_subnormals_base::reg { constexpr static uint32_t length = 4; constexpr static const Xbyak::Xmm & rmm4 = Xbyak::util::xmm4; constexpr static const Xbyak::Xmm & rmm5 = Xbyak::util::xmm5; + constexpr static const Xbyak::Xmm & rmm6 = Xbyak::util::xmm6; }; template @@ -159,7 +169,8 @@ struct jit_has_subnormals : public jit_has_subnormals_base { const int sh_bits = std::ilogb(vlen); auto zero = reg::rmm4; - auto mask = reg::rmm5; + auto exponent_mask = reg::rmm5; + auto mantissa_mask = reg::rmm6; preamble(); @@ -167,11 +178,13 @@ struct jit_has_subnormals : public jit_has_subnormals_base { mov(reg_src, ptr[param1 + offsetof(args_t, src)]); lea(reg_dst, ptr[param1 + offsetof(args_t, hasSubnormals)]); mov(reg_sz, ptr[param1 + offsetof(args_t, count)]); - mov(reg_mask_addr, (size_t)mask_data); // Initialize necessary consts uni_vpxor(zero, zero, zero); - uni_vmovdqu(mask, ptr[reg_mask_addr]); + mov(reg_mask_addr, (size_t)exponent_mask_data); + uni_vmovdqu(exponent_mask, ptr[reg_mask_addr]); + mov(reg_mask_addr, (size_t)mantissa_mask_data); + uni_vmovdqu(mantissa_mask, ptr[reg_mask_addr]); // Main loop xor_(reg_idx, reg_idx); @@ -179,7 +192,7 @@ struct jit_has_subnormals : public jit_has_subnormals_base { shr(r8, sh_bits); foreach(reg_idx, 1, r8, [&, this](const Xbyak::Reg64& idx) { - check_subnormals(reg_src, mask, zero); + check_subnormals(reg_src, exponent_mask, mantissa_mask, zero); jnc(has_subnormals); add(reg_src, sizeof(float) * vlen); }); @@ -197,7 +210,7 @@ struct jit_has_subnormals : public jit_has_subnormals_base { uni_vmovdqu(ptr[r8], zero); copy_floats(r8, reg_src, reg_sz); - check_subnormals(r8, mask, zero); + check_subnormals(r8, exponent_mask, mantissa_mask, zero); jc(no_subnormals); add(rsp, vlen * sizeof(float)); @@ -331,8 +344,10 @@ void Input::cloneBlobIfRequired() { } #endif + uint32_t mantissaMask = 0x007fffff; + uint32_t exponentMask = 0x7f800000; for (size_t i = 0; i < size; ++i) { - if (u32data[i] && (u32data[i] & (0xFF << 23)) == 0) { + if ((u32data[i] & exponentMask) == 0 && (u32data[i] & mantissaMask) != 0) { return true; } } diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/denormal_check.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/denormal_check.cpp index 51857e8d038..4777d7c3475 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/denormal_check.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/denormal_check.cpp @@ -43,10 +43,11 @@ void validate() override { ASSERT_FALSE(actualOutputs.empty()); auto& outTensor = actualOutputs.front(); ASSERT_EQ(ov::element::f32, outTensor.get_element_type()) << "Unexpected element type"; - const uint32_t* data = reinterpret_cast(outTensor.data()); + const float* data = reinterpret_cast(outTensor.data()); bool hasDenormals = false; for (size_t i = 0; i < outTensor.get_size(); ++i) { - if (data[i] && (data[i] & (0xff << 23)) == 0) { + if (std::abs(data[i]) >= std::numeric_limits::denorm_min() && + std::abs(data[i]) < std::numeric_limits::min()) { hasDenormals = true; } } @@ -108,4 +109,5 @@ TEST_F(DenormalNullifyCheck, smoke_CPU_Denormal_Check) { run(); } } + }// namespace SubgraphTestsDefinitions