From 252afa3b6cd1a4b93c53c5a13709921970425c6d Mon Sep 17 00:00:00 2001 From: River Li Date: Thu, 7 Sep 2023 13:09:16 +0800 Subject: [PATCH] [CPU] Fix incorrect output for float to bf16 in avx2 isa (#19358) --- .../src/emitters/x64/jit_bf16_emitters.hpp | 1 + .../intel_cpu/tests/unit/jit_kernel_test.cpp | 35 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_bf16_emitters.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_bf16_emitters.hpp index 88a8d0772e2..1a1e4c1d055 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_bf16_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_bf16_emitters.hpp @@ -75,6 +75,7 @@ private: h->uni_vpackusdw(aux, aux, aux); if (host_isa_ == dnnl::impl::cpu::x64::cpu_isa_t::avx2) { + h->vpermq(Ymm(aux.getIdx()), Ymm(aux.getIdx()), 0xD8); //11 01 10 00 h->vextracti128(out, Ymm(aux.getIdx()), 0); } else { h->uni_vmovups(out, aux); diff --git a/src/plugins/intel_cpu/tests/unit/jit_kernel_test.cpp b/src/plugins/intel_cpu/tests/unit/jit_kernel_test.cpp index 53576c629bc..a851e0b4c59 100644 --- a/src/plugins/intel_cpu/tests/unit/jit_kernel_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/jit_kernel_test.cpp @@ -272,20 +272,19 @@ struct jit_variable_load_store_test_kernel { size_t size; }; - template + template void test() { kernel_impl kernel; kernel.init(); - - const size_t size = 3; + ASSERT_GE(N, M); std::array src {}; std::array result {}; - Params args = { src.data(), result.data(), size }; + Params args = { src.data(), result.data(), M }; src.fill(static_cast(42)); - for (size_t i = 0; i < size; ++i) { + for (size_t i = 0; i < M; ++i) { src[i] = static_cast(i); } @@ -293,7 +292,7 @@ struct jit_variable_load_store_test_kernel { std::array expected_result {}; - for (size_t i = 0; i < size; ++i) { + for (size_t i = 0; i < M; ++i) { expected_result[i] = static_cast(i); } @@ -325,52 +324,52 @@ TEST(JitKernel, variable_load_and_store) { { jit_variable_load_store_test_kernel kernel; if (mayiuse(cpu_isa_t::avx512_core)) { - kernel.test<16, false>(); + kernel.test<16, 11, false>(); } if (mayiuse(cpu_isa_t::avx2)) { - kernel.test<8, false>(); + kernel.test<8, 5, false>(); } if (mayiuse(cpu_isa_t::sse41)) { - kernel.test<4, false>(); + kernel.test<4, 3, false>(); } } { jit_variable_load_store_test_kernel kernel; if (mayiuse(cpu_isa_t::avx512_core)) { - kernel.test<16, false>(); + kernel.test<16, 11, false>(); } if (mayiuse(cpu_isa_t::avx2)) { - kernel.test<8, false>(); + kernel.test<8, 5, false>(); } if (mayiuse(cpu_isa_t::sse41)) { - kernel.test<4, false>(); + kernel.test<4, 3, false>(); } } { jit_variable_load_store_test_kernel kernel; if (mayiuse(cpu_isa_t::avx512_core)) { - kernel.test<16, true>(); + kernel.test<16, 11, true>(); } if (mayiuse(cpu_isa_t::avx2)) { - kernel.test<8, true>(); + kernel.test<8, 5, true>(); } if (mayiuse(cpu_isa_t::sse41)) { - kernel.test<4, true>(); + kernel.test<4, 3, true>(); } } { jit_variable_load_store_test_kernel kernel; if (mayiuse(cpu_isa_t::avx512_core)) { - kernel.test<16, true>(); + kernel.test<16, 11, true>(); } if (mayiuse(cpu_isa_t::avx2)) { - kernel.test<8, true>(); + kernel.test<8, 5, true>(); } if (mayiuse(cpu_isa_t::sse41)) { - kernel.test<4, true>(); + kernel.test<4, 3, true>(); } } }