[CPU] Fix issue about gather int32 in Reduce node (#13756)

This commit is contained in:
Chen Xu
2022-11-03 13:47:48 +08:00
committed by GitHub
parent b7dfc31597
commit a11189da66
2 changed files with 44 additions and 2 deletions

View File

@@ -549,10 +549,20 @@ private:
case memory::data_type::s32:
if (isa == cpu::x64::avx512_core) {
kxnord(k_mask, k_mask, k_mask);
vgatherdps(vmm_src | k_mask, ptr[reg_src + offset + vmm_idx]);
if (jcp_.src_dt == memory::data_type::f32) {
vgatherdps(vmm_src | k_mask, ptr[reg_src + offset + vmm_idx]);
} else {
vpgatherdd(vmm_src | k_mask, ptr[reg_src + offset + vmm_idx]);
uni_vcvtdq2ps(vmm_src, vmm_src);
}
} else if (isa == cpu::x64::avx2) {
uni_vpcmpeqd(vmm_mask, vmm_mask, vmm_mask);
vgatherdps(vmm_src, ptr[reg_src + offset + vmm_idx], vmm_mask);
if (jcp_.src_dt == memory::data_type::f32) {
vgatherdps(vmm_src, ptr[reg_src + offset + vmm_idx], vmm_mask);
} else {
vpgatherdd(vmm_src, ptr[reg_src + offset + vmm_idx], vmm_mask);
uni_vcvtdq2ps(vmm_src, vmm_src);
}
} else {
pack_gathered_vector(vmm_src, vmm_idx, offset, jcp_.src_dt);
}

View File

@@ -258,6 +258,13 @@ const std::vector<ngraph::helpers::ReductionType> reductionTypes = {
ngraph::helpers::ReductionType::L2,
};
const std::vector<ngraph::helpers::ReductionType> reductionTypesInt32 = {
ngraph::helpers::ReductionType::Sum,
ngraph::helpers::ReductionType::Min,
ngraph::helpers::ReductionType::Max,
ngraph::helpers::ReductionType::L1,
};
const std::vector<ngraph::helpers::ReductionType> reductionTypesFusing = {
ngraph::helpers::ReductionType::Mean,
ngraph::helpers::ReductionType::Max,
@@ -284,6 +291,11 @@ std::vector<std::vector<ov::test::InputShape>> inputShapes_6D = {
{{{{1, 5}, 19, {1, 5}, {1, 5}, {1, 5}, {1, 5}}, {{2, 19, 2, 2, 2, 2}, {2, 19, 2, 2, 3, 2}}}},
};
std::vector<std::vector<ov::test::InputShape>> inputShapes_Int32 = {
{{{}, {{2, 19, 2, 3}}}},
{{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 3}}}},
};
std::vector<CPUSpecificParams> cpuParams_4D = {
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}),
CPUSpecificParams({nchw}, {nchw}, {}, {}),
@@ -408,6 +420,19 @@ const auto params_MultiAxis_6D = testing::Combine(
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
const auto params_Int32 = testing::Combine(
testing::Combine(
testing::ValuesIn(axes),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::ValuesIn(keepDims),
testing::ValuesIn(reductionTypesInt32),
testing::Values(ElementType::i32),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_Int32)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_CPU,
ReduceCPULayerTest,
@@ -450,6 +475,13 @@ INSTANTIATE_TEST_SUITE_P(
ReduceCPULayerTest::getTestCaseName
);
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_Int32_CPU,
ReduceCPULayerTest,
params_Int32,
ReduceCPULayerTest::getTestCaseName
);
/* ================================ 1.2 No fusion - Logical ================================ */
const auto params_OneAxis_Logical = testing::Combine(
testing::Combine(