[CPU] Reduce node supports fp16 precision (#18227)

This commit is contained in:
Chen Xu 2023-07-18 20:49:25 +08:00 committed by GitHub
parent 8a49cf14ee
commit 9334ad0790
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 267 additions and 122 deletions

View File

@ -121,6 +121,13 @@ using ov::with_cpu_x86_avx512_core_vnni;
*/
using ov::with_cpu_x86_bfloat16;
/**
* @brief Checks whether CPU supports fp16 capability
* @ingroup ie_dev_api_system_conf
* @return `True` is tAVX512_FP16 instructions are available, `false` otherwise
*/
using ov::with_cpu_x86_avx512_core_fp16;
/**
* @brief Checks whether CPU supports AMX int8 capability
* @ingroup ie_dev_api_system_conf

View File

@ -110,6 +110,13 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx512_core_vnni();
*/
OPENVINO_RUNTIME_API bool with_cpu_x86_bfloat16();
/**
* @brief Checks whether CPU supports fp16 capability
* @ingroup ov_dev_api_system_conf
* @return `True` is tAVX512_FP16 instructions are available, `false` otherwise
*/
OPENVINO_RUNTIME_API bool with_cpu_x86_avx512_core_fp16();
/**
* @brief Checks whether CPU supports AMX int8 capability
* @ingroup ov_dev_api_system_conf

View File

@ -72,6 +72,10 @@ bool with_cpu_x86_bfloat16() {
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16);
}
bool with_cpu_x86_avx512_core_fp16() {
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_FP16);
}
bool with_cpu_x86_avx512_core_amx_int8() {
return get_cpu_info().has(Xbyak::util::Cpu::tAMX_INT8);
}
@ -107,6 +111,9 @@ bool with_cpu_x86_avx512_core_vnni() {
bool with_cpu_x86_bfloat16() {
return false;
}
bool with_cpu_x86_avx512_core_fp16() {
return false;
}
bool with_cpu_x86_avx512_core_amx_int8() {
return false;
}

View File

@ -311,7 +311,8 @@ void Graph::Replicate(const CNNNetwork &network) {
const auto childEdges = input.second->getChildEdgesAtPort(0);
for (size_t i = 0; i < childEdges.size(); i++) {
const auto child = childEdges[i]->getChild();
if (child->getOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum()) != Precision::BF16 &&
if (!one_of(child->getOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum()),
Precision::BF16, Precision::FP16) &&
// remove this WA when #78939 is resolved
!hasSubgraphConsumers(child))
child->setOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum(), precToSet);

View File

@ -108,7 +108,7 @@ bool ReduceKey::operator==(const ReduceKey &rhs) const {
// some utility functions
static inline bool isFloatCompatible(memory::data_type type) {
return memory::data_type::f32 == type || memory::data_type::bf16 == type;
return memory::data_type::f32 == type || memory::data_type::bf16 == type || memory::data_type::f16 == type;
}
template <cpu_isa_t isa>
@ -590,6 +590,7 @@ private:
}
break;
case memory::data_type::bf16:
case memory::data_type::f16:
case memory::data_type::s8:
case memory::data_type::u8:
pack_gathered_vector(vmm_src, vmm_idx, offset, jcp_.src_dt);
@ -614,8 +615,9 @@ private:
mov(ptr[rsp + i * sizeof(int)], reg_tmp_64.cvt32());
break;
case memory::data_type::bf16:
case memory::data_type::f16:
mov(reg_tmp_64.cvt16(), table_idx);
mov(ptr[rsp + i * sizeof(ov::intel_cpu::bfloat16_t)], reg_tmp_64.cvt16());
mov(ptr[rsp + i * 2], reg_tmp_64.cvt16());
break;
case memory::data_type::s8:
case memory::data_type::u8:
@ -635,7 +637,10 @@ private:
case memory::data_type::bf16:
uni_vpmovzxwd(vmm_val, ptr[rsp]);
uni_vpslld(vmm_val, vmm_val, 16);
break;
break;
case memory::data_type::f16:
vcvtph2ps(vmm_val, ptr[rsp]);
break;
case memory::data_type::s8:
uni_vpmovsxbd(vmm_val, ptr[rsp]);
break;
@ -890,6 +895,9 @@ private:
uni_vpmovzxwd(vmm_src, op);
uni_vpslld(vmm_src, vmm_src, 16);
break;
case memory::data_type::f16:
vcvtph2ps(vmm_src, op);
break;
case memory::data_type::s8:
uni_vpmovsxbd(vmm_src, op);
break;
@ -914,6 +922,9 @@ private:
uni_vpinsrw(xmm_src, xmm_src, op, 0x0);
uni_vpslld(xmm_src, xmm_src, 16);
break;
case memory::data_type::f16:
vcvtph2ps(xmm_src, op);
break;
case memory::data_type::s8:
movsx(reg_tmp_32, op);
uni_vmovq(xmm_src, reg_tmp_64);
@ -948,6 +959,9 @@ private:
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
vmovdqu16(op, ymm_dst);
break;
case memory::data_type::f16:
vcvtps2ph(op, vmm_dst, 0x4);
break;
case memory::data_type::s8:
if (isa == cpu::x64::avx512_core) {
vpmovsdb(op, vmm_dst);
@ -996,6 +1010,9 @@ private:
uni_vpsrld(xmm_dst, xmm_dst, 16);
uni_vpextrw(op, xmm_dst, 0x0);
break;
case memory::data_type::f16:
vcvtps2ph(op, xmm_dst, 0x4);
break;
case memory::data_type::s8:
uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst);
uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst);
@ -1540,6 +1557,9 @@ private:
uni_vpmovzxwd(vmm_src, op);
uni_vpslld(vmm_src, vmm_src, 16);
break;
case memory::data_type::f16:
vcvtph2ps(vmm_src, op);
break;
case memory::data_type::s8:
uni_vpmovsxbd(vmm_src, op);
break;
@ -1564,6 +1584,9 @@ private:
uni_vpinsrw(xmm_src, xmm_src, op, 0x0);
uni_vpslld(xmm_src, xmm_src, 16);
break;
case memory::data_type::f16:
vcvtph2ps(xmm_src, op);
break;
case memory::data_type::s8:
movsx(reg_tmp_32, op);
uni_vmovq(xmm_src, reg_tmp_64);
@ -1598,6 +1621,9 @@ private:
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
vmovdqu16(op, ymm_dst);
break;
case memory::data_type::f16:
vcvtps2ph(op, vmm_dst, 0x4);
break;
case memory::data_type::s8:
if (isa == cpu::x64::avx512_core) {
vpmovsdb(op, vmm_dst);
@ -1646,6 +1672,9 @@ private:
uni_vpsrld(xmm_dst, xmm_dst, 16);
uni_vpextrw(op, xmm_dst, 0x0);
break;
case memory::data_type::f16:
vcvtps2ph(op, xmm_dst, 0x4);
break;
case memory::data_type::s8:
uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst);
uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst);
@ -1878,16 +1907,20 @@ void Reduce::initSupportedPrimitiveDescriptors() {
jit_mode = canApplyJIT(input_prec, output_prec);
auto is_precision_sensitive_reduce = [](const Algorithm &algorithm) {
return algorithm != Algorithm::ReduceAnd && algorithm != Algorithm::ReduceOr &&
algorithm != Algorithm::ReduceMin && algorithm != Algorithm::ReduceMax;
};
if (jit_mode) {
// Since in jit mode we use the output memory as an intermediate accumulator for certain reduce modes, we can't use BF16 output precision due to
// Since in jit mode we use the output memory as an intermediate accumulator for certain reduce modes, we can't use BF16/FP16 output precision due to
// the possible accuracy loss. Therefore, for such mods, we will change the output precision to FP32.
if (Precision::BF16 == output_prec) {
if (!mayiuse(avx512_core)) {
output_prec = Precision::FP32;
} else if (algorithm != Algorithm::ReduceAnd && algorithm != Algorithm::ReduceOr &&
algorithm != Algorithm::ReduceMin && algorithm != Algorithm::ReduceMax) {
output_prec = Precision::FP32;
}
if (!mayiuse(avx512_core) || is_precision_sensitive_reduce(algorithm))
output_prec = Precision::FP32;
} else if (Precision::FP16 == output_prec) {
if (!mayiuse(cpu::x64::avx2) || is_precision_sensitive_reduce(algorithm))
output_prec = Precision::FP32;
}
}
@ -2862,6 +2895,9 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
} else if (output_prec == Precision::BF16) {
auto out_p = reinterpret_cast<bfloat16_t*>(out_ptr);
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<bfloat16_t>(1); });
} else if (output_prec == Precision::FP16) {
auto out_p = reinterpret_cast<ov::float16*>(out_ptr);
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<ov::float16>(1); });
} else if (output_prec == Precision::U8) {
auto out_p = reinterpret_cast<uint8_t *>(out_ptr);
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<uint8_t>(1); });
@ -2880,6 +2916,9 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
} else if (output_prec == Precision::BF16) {
auto out_p = reinterpret_cast<bfloat16_t*>(out_ptr);
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<bfloat16_t>::lowest(); });
} else if (output_prec == Precision::FP16) {
auto out_p = reinterpret_cast<ov::float16*>(out_ptr);
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<ov::float16>::lowest(); });
} else if (output_prec == Precision::U8) {
auto out_p = reinterpret_cast<uint8_t *>(out_ptr);
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<uint8_t>::min(); });
@ -2898,6 +2937,9 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
} else if (output_prec == Precision::BF16) {
auto out_p = reinterpret_cast<bfloat16_t*>(out_ptr);
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<bfloat16_t>::max(); });
} else if (output_prec == Precision::FP16) {
auto out_p = reinterpret_cast<ov::float16*>(out_ptr);
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<ov::float16>::max(); });
} else if (output_prec == Precision::U8) {
auto out_p = reinterpret_cast<uint8_t *>(out_ptr);
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<uint8_t>::max(); });
@ -3268,6 +3310,7 @@ std::vector<int> Reduce::update_src_dims() {
bool Reduce::canApplyJIT(const Precision &input_prec, const Precision &output_prec) const {
static const Precision supportedPrecisions[] = {
Precision::FP32,
Precision::FP16,
Precision::BF16,
Precision::I32,
Precision::I8,

View File

@ -74,7 +74,7 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*OVCompiledModelBaseTest.*(CanGetInputsInfoAndCheck|canSetConfigToCompiledModel).*)",
R"(.*Behavior.*CorrectConfigCheck.*(canSetConfigAndCheckGetConfig|canSetConfigTwiceAndCheckGetConfig).*CPU_BIND_THREAD=YES.*)",
// Issue: 72021 Unreasonable abs_threshold for comparing bf16 results
R"(.*smoke_Reduce.*type=(Prod|Min).*netPRC=(BF|bf)16.*)",
R"(.*smoke_Reduce.*type=(Prod|Min).*INFERENCE_PRECISION_HINT=(BF|bf)16.*)",
// TODO: 56520 Accuracy mismatch
R"(.*ReduceOpsLayerTest.*type=Mean_.*netPRC=(I64|I32).*)",
R"(.*ReduceOpsLayerTest.*type=Mean_.*netPRC=U64.*)",
@ -246,6 +246,12 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(.*Snippets.*MHA.*)");
retVector.emplace_back(R"(.*Snippets.*(MatMul|Matmul).*)");
}
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) {
// Skip fp16 tests for paltforms that don't support fp16 precision
retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)");
}
#endif
if (!InferenceEngine::with_cpu_x86_avx512_core_vnni() && !InferenceEngine::with_cpu_x86_avx512_core_amx_int8()) {
// MatMul in Snippets uses BRGEMM that supports i8 only on platforms with VNNI or AMX instructions
retVector.emplace_back(R"(.*Snippets.*MatMulFQ.*)");

View File

@ -18,7 +18,8 @@ std::string ReduceCPULayerTest::getTestCaseName(testing::TestParamInfo<ReduceLay
basicReduceParams basicParams;
CPUSpecificParams cpuParams;
fusingSpecificParams fusingParams;
std::tie(basicParams, cpuParams, fusingParams) = obj.param;
std::map<std::string, ov::element::Type> additionalConfig;
std::tie(basicParams, cpuParams, fusingParams, additionalConfig) = obj.param;
std::vector<int> axes;
CommonTestUtils::OpType opType;
@ -51,6 +52,13 @@ std::string ReduceCPULayerTest::getTestCaseName(testing::TestParamInfo<ReduceLay
result << "inPRC=" << inPrc << "_";
result << "outPRC=" << outPrc << "_";
if (!additionalConfig.empty()) {
result << "PluginConf";
for (auto& item : additionalConfig) {
result << "_" << item.first << "=" << item.second.get_type_name();
}
}
result << CPUTestsBase::getTestCaseName(cpuParams);
result << CpuTestWithFusing::getTestCaseName(fusingParams);
@ -63,7 +71,8 @@ void ReduceCPULayerTest::SetUp() {
basicReduceParams basicParams;
CPUSpecificParams cpuParams;
fusingSpecificParams fusingParams;
std::tie(basicParams, cpuParams, fusingParams) = this->GetParam();
std::map<std::string, ov::element::Type> additionalConfig;
std::tie(basicParams, cpuParams, fusingParams, additionalConfig) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
@ -75,7 +84,18 @@ void ReduceCPULayerTest::SetUp() {
std::vector<InputShape> inputShapes;
std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes) = basicParams;
inPrc = outPrc = netPrecision;
if (netPrecision == ElementType::boolean) {
inPrc = outPrc = netPrecision;
} else {
if (additionalConfig[ov::hint::inference_precision.name()] == ov::element::bf16) {
inPrc = outPrc = netPrecision = ElementType::bf16;
} else if (additionalConfig[ov::hint::inference_precision.name()] == ov::element::f16) {
inPrc = outPrc = netPrecision = ElementType::f16;
} else {
inPrc = outPrc = netPrecision;
}
}
configuration.insert(additionalConfig.begin(), additionalConfig.end());
init_input_shapes(inputShapes);
@ -144,6 +164,11 @@ void ReduceCPULayerTest::generate_inputs(const std::vector<ngraph::Shape>& targe
for (size_t i = 0; i < tensor.get_size(); ++i) {
rawBlobDataPtr[i] /= 10.f;
}
} else if (netPrecision == ElementType::f16) {
auto *rawBlobDataPtr = static_cast<ngraph::float16 *>(tensor.data());
for (size_t i = 0; i < tensor.get_size(); ++i) {
rawBlobDataPtr[i] /= 10.f;
}
} else if (netPrecision == ElementType::bf16) {
auto* rawBlobDataPtr = static_cast<ngraph::bfloat16*>(tensor.data());
for (size_t i = 0; i < tensor.get_size(); ++i) {
@ -222,10 +247,29 @@ const std::vector<ngraph::helpers::ReductionType>& reductionTypes() {
}
const std::vector<ElementType>& inpOutPrc() {
static const std::vector<ElementType> inpOutPrc = {ElementType::bf16, ElementType::f32};
static const std::vector<ElementType> inpOutPrc = {ElementType::f32};
return inpOutPrc;
}
const std::vector<std::map<std::string, ov::element::Type>> additionalConfig() {
static const std::vector<std::map<std::string, ov::element::Type>> additionalConfig = {
{{ov::hint::inference_precision.name(), ov::element::f32}},
{{ov::hint::inference_precision.name(), ov::element::bf16}},
// ARM doesn't support FP16 for now
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
{{ov::hint::inference_precision.name(), ov::element::f16}},
#endif
};
return additionalConfig;
}
const std::vector<std::map<std::string, ov::element::Type>> additionalConfigFP32() {
static const std::vector<std::map<std::string, ov::element::Type>> additionalConfig = {
{{ov::hint::inference_precision.name(), ov::element::f32}}
};
return additionalConfig;
}
const std::vector<ngraph::helpers::ReductionType>& reductionTypesInt32() {
static const std::vector<ngraph::helpers::ReductionType> reductionTypesInt32 = {
ngraph::helpers::ReductionType::Sum,

View File

@ -29,7 +29,8 @@ typedef std::tuple<
typedef std::tuple<
basicReduceParams,
CPUSpecificParams,
fusingSpecificParams> ReduceLayerCPUTestParamSet;
fusingSpecificParams,
std::map<std::string, ov::element::Type>> ReduceLayerCPUTestParamSet;
class ReduceCPULayerTest : public testing::WithParamInterface<ReduceLayerCPUTestParamSet>,
virtual public SubgraphBaseTest, public CpuTestWithFusing {
@ -52,6 +53,8 @@ const std::vector<std::vector<int>>& axesND();
const std::vector<CommonTestUtils::OpType>& opTypes();
const std::vector<ngraph::helpers::ReductionType>& reductionTypes();
const std::vector<ElementType>& inpOutPrc();
const std::vector<std::map<std::string, ov::element::Type>> additionalConfig();
const std::vector<std::map<std::string, ov::element::Type>> additionalConfigFP32();
const std::vector<ngraph::helpers::ReductionType>& reductionTypesInt32();
} // namespace Reduce

View File

@ -67,7 +67,8 @@ const auto params_OneAxis = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
const auto params_OneAxis_dynamic = testing::Combine(
testing::Combine(
@ -80,7 +81,8 @@ const auto params_OneAxis_dynamic = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dynamic_3dims)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
const auto params_MultiAxis_4D = testing::Combine(
testing::Combine(
@ -93,7 +95,8 @@ const auto params_MultiAxis_4D = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
const auto params_MultiAxis_4D_dynamic = testing::Combine(
testing::Combine(
@ -106,7 +109,8 @@ const auto params_MultiAxis_4D_dynamic = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dynamic_2dims)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
const auto params_Int32 = testing::Combine(
testing::Combine(
@ -119,7 +123,8 @@ const auto params_Int32 = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_Int32)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_CPU,

View File

@ -132,6 +132,7 @@ const std::vector<fusingSpecificParams> fusingParamsSet_KeepNoDims {
fusingScaleShift
};
/* ================================ 1.1 No fusion - Arithmetic ================================ */
const auto params_OneAxis = testing::Combine(
testing::Combine(
testing::ValuesIn(axes()),
@ -143,7 +144,8 @@ const auto params_OneAxis = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
const auto params_MultiAxis_4D = testing::Combine(
testing::Combine(
@ -156,7 +158,64 @@ const auto params_MultiAxis_4D = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
const auto params_MultiAxis_5D = testing::Combine(
testing::Combine(
testing::ValuesIn(axes5D),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
const auto params_MultiAxis_4D_Hybrid = testing::Combine(
testing::Combine(
testing::ValuesIn(axesND()),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(false),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
const auto params_MultiAxis_5D_Hybrid = testing::Combine(
testing::Combine(
testing::ValuesIn(axes5D),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(false),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
const auto params_MultiAxis_6D = testing::Combine(
testing::Combine(
testing::ValuesIn(axes6D),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::ValuesIn(keepDims()),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_6D_dyn)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
const auto params_Int32 = testing::Combine(
testing::Combine(
@ -169,7 +228,36 @@ const auto params_Int32 = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_Int32_dyn)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
const auto params_NHWC_SmallChannel = testing::Combine(
testing::Combine(
testing::ValuesIn(axesHW),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_SmallChannel_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
const auto params_SingleBatch = testing::Combine(
testing::Combine(
testing::ValuesIn(axes()),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_SingleBatch_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_CPU,
@ -185,91 +273,6 @@ INSTANTIATE_TEST_SUITE_P(
ReduceCPULayerTest::getTestCaseName
);
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_Int32_CPU,
ReduceCPULayerTest,
params_Int32,
ReduceCPULayerTest::getTestCaseName
);
const auto params_MultiAxis_5D = testing::Combine(
testing::Combine(
testing::ValuesIn(axes5D),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
testing::Values(emptyFusingSpec));
const auto params_MultiAxis_4D_Hybrid = testing::Combine(
testing::Combine(
testing::ValuesIn(axesND()),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(false),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
testing::Values(emptyFusingSpec));
const auto params_MultiAxis_5D_Hybrid = testing::Combine(
testing::Combine(
testing::ValuesIn(axes5D),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(false),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
testing::Values(emptyFusingSpec));
const auto params_MultiAxis_6D = testing::Combine(
testing::Combine(
testing::ValuesIn(axes6D),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::ValuesIn(keepDims()),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_6D_dyn)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
const auto params_NHWC_SmallChannel = testing::Combine(
testing::Combine(
testing::ValuesIn(axesHW),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_SmallChannel_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
testing::Values(emptyFusingSpec));
const auto params_SingleBatch = testing::Combine(
testing::Combine(
testing::ValuesIn(axes()),
testing::Values(CommonTestUtils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_SingleBatch_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
testing::Values(emptyFusingSpec));
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_MultiAxis_5D_CPU,
ReduceCPULayerTest,
@ -298,6 +301,13 @@ INSTANTIATE_TEST_SUITE_P(
ReduceCPULayerTest::getTestCaseName
);
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_Int32_CPU,
ReduceCPULayerTest,
params_Int32,
ReduceCPULayerTest::getTestCaseName
);
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_NHWC_SmallChannel_CPU,
ReduceCPULayerTest,
@ -324,7 +334,8 @@ const auto params_OneAxis_Logical = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
const auto params_MultiAxis_4D_Logical = testing::Combine(
testing::Combine(
@ -337,7 +348,8 @@ const auto params_MultiAxis_4D_Logical = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
const auto params_MultiAxis_5D_Logical = testing::Combine(
testing::Combine(
@ -350,7 +362,8 @@ const auto params_MultiAxis_5D_Logical = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
const auto params_MultiAxis_4D_Hybrid_Logical = testing::Combine(
testing::Combine(
@ -363,7 +376,8 @@ const auto params_MultiAxis_4D_Hybrid_Logical = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
const auto params_MultiAxis_5D_Hybrid_Logical = testing::Combine(
testing::Combine(
@ -376,7 +390,8 @@ const auto params_MultiAxis_5D_Hybrid_Logical = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
const auto params_MultiAxis_6D_Logical = testing::Combine(
testing::Combine(
@ -389,7 +404,8 @@ const auto params_MultiAxis_6D_Logical = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_6D_dyn)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec));
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_Logical_CPU,
@ -445,7 +461,8 @@ const auto params_OneAxis_fusing = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::Values(emptyCPUSpec),
testing::ValuesIn(fusingParamsSet));
testing::ValuesIn(fusingParamsSet),
testing::ValuesIn(additionalConfig()));
const auto params_MultiAxis_4D_fusing = testing::Combine(
testing::Combine(
@ -458,7 +475,8 @@ const auto params_MultiAxis_4D_fusing = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
testing::ValuesIn(fusingParamsSet));
testing::ValuesIn(fusingParamsSet),
testing::ValuesIn(additionalConfig()));
const auto params_MultiAxis_5D_fusing = testing::Combine(
testing::Combine(
@ -471,7 +489,8 @@ const auto params_MultiAxis_5D_fusing = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
testing::ValuesIn(fusingParamsSet));
testing::ValuesIn(fusingParamsSet),
testing::ValuesIn(additionalConfig()));
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_fusing_CPU,
@ -506,7 +525,8 @@ const auto params_OneAxis_fusing_KeepNoDims = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::Values(emptyCPUSpec),
testing::ValuesIn(fusingParamsSet_KeepNoDims));
testing::ValuesIn(fusingParamsSet_KeepNoDims),
testing::ValuesIn(additionalConfigFP32()));
const auto params_MultiAxis_4D_Hybrid_fusing_KeepNoDims = testing::Combine(
testing::Combine(
@ -519,7 +539,8 @@ const auto params_MultiAxis_4D_Hybrid_fusing_KeepNoDims = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
testing::ValuesIn(fusingParamsSet_KeepNoDims));
testing::ValuesIn(fusingParamsSet_KeepNoDims),
testing::ValuesIn(additionalConfigFP32()));
const auto params_MultiAxis_5D_Hybrid_fusing_KeepNoDims = testing::Combine(
testing::Combine(
@ -532,7 +553,8 @@ const auto params_MultiAxis_5D_Hybrid_fusing_KeepNoDims = testing::Combine(
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_dyn)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
testing::ValuesIn(fusingParamsSet_KeepNoDims));
testing::ValuesIn(fusingParamsSet_KeepNoDims),
testing::ValuesIn(additionalConfigFP32()));
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_fusing_KeepNoDims_CPU,