[CPU] Reduce node supports fp16 precision (#18227)
This commit is contained in:
parent
8a49cf14ee
commit
9334ad0790
@ -121,6 +121,13 @@ using ov::with_cpu_x86_avx512_core_vnni;
|
||||
*/
|
||||
using ov::with_cpu_x86_bfloat16;
|
||||
|
||||
/**
|
||||
* @brief Checks whether CPU supports fp16 capability
|
||||
* @ingroup ie_dev_api_system_conf
|
||||
* @return `True` is tAVX512_FP16 instructions are available, `false` otherwise
|
||||
*/
|
||||
using ov::with_cpu_x86_avx512_core_fp16;
|
||||
|
||||
/**
|
||||
* @brief Checks whether CPU supports AMX int8 capability
|
||||
* @ingroup ie_dev_api_system_conf
|
||||
|
@ -110,6 +110,13 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx512_core_vnni();
|
||||
*/
|
||||
OPENVINO_RUNTIME_API bool with_cpu_x86_bfloat16();
|
||||
|
||||
/**
|
||||
* @brief Checks whether CPU supports fp16 capability
|
||||
* @ingroup ov_dev_api_system_conf
|
||||
* @return `True` is tAVX512_FP16 instructions are available, `false` otherwise
|
||||
*/
|
||||
OPENVINO_RUNTIME_API bool with_cpu_x86_avx512_core_fp16();
|
||||
|
||||
/**
|
||||
* @brief Checks whether CPU supports AMX int8 capability
|
||||
* @ingroup ov_dev_api_system_conf
|
||||
|
@ -72,6 +72,10 @@ bool with_cpu_x86_bfloat16() {
|
||||
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_BF16);
|
||||
}
|
||||
|
||||
bool with_cpu_x86_avx512_core_fp16() {
|
||||
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512_FP16);
|
||||
}
|
||||
|
||||
bool with_cpu_x86_avx512_core_amx_int8() {
|
||||
return get_cpu_info().has(Xbyak::util::Cpu::tAMX_INT8);
|
||||
}
|
||||
@ -107,6 +111,9 @@ bool with_cpu_x86_avx512_core_vnni() {
|
||||
bool with_cpu_x86_bfloat16() {
|
||||
return false;
|
||||
}
|
||||
bool with_cpu_x86_avx512_core_fp16() {
|
||||
return false;
|
||||
}
|
||||
bool with_cpu_x86_avx512_core_amx_int8() {
|
||||
return false;
|
||||
}
|
||||
|
@ -311,7 +311,8 @@ void Graph::Replicate(const CNNNetwork &network) {
|
||||
const auto childEdges = input.second->getChildEdgesAtPort(0);
|
||||
for (size_t i = 0; i < childEdges.size(); i++) {
|
||||
const auto child = childEdges[i]->getChild();
|
||||
if (child->getOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum()) != Precision::BF16 &&
|
||||
if (!one_of(child->getOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum()),
|
||||
Precision::BF16, Precision::FP16) &&
|
||||
// remove this WA when #78939 is resolved
|
||||
!hasSubgraphConsumers(child))
|
||||
child->setOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum(), precToSet);
|
||||
|
@ -108,7 +108,7 @@ bool ReduceKey::operator==(const ReduceKey &rhs) const {
|
||||
|
||||
// some utility functions
|
||||
static inline bool isFloatCompatible(memory::data_type type) {
|
||||
return memory::data_type::f32 == type || memory::data_type::bf16 == type;
|
||||
return memory::data_type::f32 == type || memory::data_type::bf16 == type || memory::data_type::f16 == type;
|
||||
}
|
||||
|
||||
template <cpu_isa_t isa>
|
||||
@ -590,6 +590,7 @@ private:
|
||||
}
|
||||
break;
|
||||
case memory::data_type::bf16:
|
||||
case memory::data_type::f16:
|
||||
case memory::data_type::s8:
|
||||
case memory::data_type::u8:
|
||||
pack_gathered_vector(vmm_src, vmm_idx, offset, jcp_.src_dt);
|
||||
@ -614,8 +615,9 @@ private:
|
||||
mov(ptr[rsp + i * sizeof(int)], reg_tmp_64.cvt32());
|
||||
break;
|
||||
case memory::data_type::bf16:
|
||||
case memory::data_type::f16:
|
||||
mov(reg_tmp_64.cvt16(), table_idx);
|
||||
mov(ptr[rsp + i * sizeof(ov::intel_cpu::bfloat16_t)], reg_tmp_64.cvt16());
|
||||
mov(ptr[rsp + i * 2], reg_tmp_64.cvt16());
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
case memory::data_type::u8:
|
||||
@ -635,7 +637,10 @@ private:
|
||||
case memory::data_type::bf16:
|
||||
uni_vpmovzxwd(vmm_val, ptr[rsp]);
|
||||
uni_vpslld(vmm_val, vmm_val, 16);
|
||||
break;
|
||||
break;
|
||||
case memory::data_type::f16:
|
||||
vcvtph2ps(vmm_val, ptr[rsp]);
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
uni_vpmovsxbd(vmm_val, ptr[rsp]);
|
||||
break;
|
||||
@ -890,6 +895,9 @@ private:
|
||||
uni_vpmovzxwd(vmm_src, op);
|
||||
uni_vpslld(vmm_src, vmm_src, 16);
|
||||
break;
|
||||
case memory::data_type::f16:
|
||||
vcvtph2ps(vmm_src, op);
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
uni_vpmovsxbd(vmm_src, op);
|
||||
break;
|
||||
@ -914,6 +922,9 @@ private:
|
||||
uni_vpinsrw(xmm_src, xmm_src, op, 0x0);
|
||||
uni_vpslld(xmm_src, xmm_src, 16);
|
||||
break;
|
||||
case memory::data_type::f16:
|
||||
vcvtph2ps(xmm_src, op);
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
movsx(reg_tmp_32, op);
|
||||
uni_vmovq(xmm_src, reg_tmp_64);
|
||||
@ -948,6 +959,9 @@ private:
|
||||
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
|
||||
vmovdqu16(op, ymm_dst);
|
||||
break;
|
||||
case memory::data_type::f16:
|
||||
vcvtps2ph(op, vmm_dst, 0x4);
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
if (isa == cpu::x64::avx512_core) {
|
||||
vpmovsdb(op, vmm_dst);
|
||||
@ -996,6 +1010,9 @@ private:
|
||||
uni_vpsrld(xmm_dst, xmm_dst, 16);
|
||||
uni_vpextrw(op, xmm_dst, 0x0);
|
||||
break;
|
||||
case memory::data_type::f16:
|
||||
vcvtps2ph(op, xmm_dst, 0x4);
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst);
|
||||
uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst);
|
||||
@ -1540,6 +1557,9 @@ private:
|
||||
uni_vpmovzxwd(vmm_src, op);
|
||||
uni_vpslld(vmm_src, vmm_src, 16);
|
||||
break;
|
||||
case memory::data_type::f16:
|
||||
vcvtph2ps(vmm_src, op);
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
uni_vpmovsxbd(vmm_src, op);
|
||||
break;
|
||||
@ -1564,6 +1584,9 @@ private:
|
||||
uni_vpinsrw(xmm_src, xmm_src, op, 0x0);
|
||||
uni_vpslld(xmm_src, xmm_src, 16);
|
||||
break;
|
||||
case memory::data_type::f16:
|
||||
vcvtph2ps(xmm_src, op);
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
movsx(reg_tmp_32, op);
|
||||
uni_vmovq(xmm_src, reg_tmp_64);
|
||||
@ -1598,6 +1621,9 @@ private:
|
||||
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
|
||||
vmovdqu16(op, ymm_dst);
|
||||
break;
|
||||
case memory::data_type::f16:
|
||||
vcvtps2ph(op, vmm_dst, 0x4);
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
if (isa == cpu::x64::avx512_core) {
|
||||
vpmovsdb(op, vmm_dst);
|
||||
@ -1646,6 +1672,9 @@ private:
|
||||
uni_vpsrld(xmm_dst, xmm_dst, 16);
|
||||
uni_vpextrw(op, xmm_dst, 0x0);
|
||||
break;
|
||||
case memory::data_type::f16:
|
||||
vcvtps2ph(op, xmm_dst, 0x4);
|
||||
break;
|
||||
case memory::data_type::s8:
|
||||
uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst);
|
||||
uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst);
|
||||
@ -1878,16 +1907,20 @@ void Reduce::initSupportedPrimitiveDescriptors() {
|
||||
|
||||
jit_mode = canApplyJIT(input_prec, output_prec);
|
||||
|
||||
auto is_precision_sensitive_reduce = [](const Algorithm &algorithm) {
|
||||
return algorithm != Algorithm::ReduceAnd && algorithm != Algorithm::ReduceOr &&
|
||||
algorithm != Algorithm::ReduceMin && algorithm != Algorithm::ReduceMax;
|
||||
};
|
||||
|
||||
if (jit_mode) {
|
||||
// Since in jit mode we use the output memory as an intermediate accumulator for certain reduce modes, we can't use BF16 output precision due to
|
||||
// Since in jit mode we use the output memory as an intermediate accumulator for certain reduce modes, we can't use BF16/FP16 output precision due to
|
||||
// the possible accuracy loss. Therefore, for such mods, we will change the output precision to FP32.
|
||||
if (Precision::BF16 == output_prec) {
|
||||
if (!mayiuse(avx512_core)) {
|
||||
output_prec = Precision::FP32;
|
||||
} else if (algorithm != Algorithm::ReduceAnd && algorithm != Algorithm::ReduceOr &&
|
||||
algorithm != Algorithm::ReduceMin && algorithm != Algorithm::ReduceMax) {
|
||||
output_prec = Precision::FP32;
|
||||
}
|
||||
if (!mayiuse(avx512_core) || is_precision_sensitive_reduce(algorithm))
|
||||
output_prec = Precision::FP32;
|
||||
} else if (Precision::FP16 == output_prec) {
|
||||
if (!mayiuse(cpu::x64::avx2) || is_precision_sensitive_reduce(algorithm))
|
||||
output_prec = Precision::FP32;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2862,6 +2895,9 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
|
||||
} else if (output_prec == Precision::BF16) {
|
||||
auto out_p = reinterpret_cast<bfloat16_t*>(out_ptr);
|
||||
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<bfloat16_t>(1); });
|
||||
} else if (output_prec == Precision::FP16) {
|
||||
auto out_p = reinterpret_cast<ov::float16*>(out_ptr);
|
||||
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<ov::float16>(1); });
|
||||
} else if (output_prec == Precision::U8) {
|
||||
auto out_p = reinterpret_cast<uint8_t *>(out_ptr);
|
||||
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<uint8_t>(1); });
|
||||
@ -2880,6 +2916,9 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
|
||||
} else if (output_prec == Precision::BF16) {
|
||||
auto out_p = reinterpret_cast<bfloat16_t*>(out_ptr);
|
||||
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<bfloat16_t>::lowest(); });
|
||||
} else if (output_prec == Precision::FP16) {
|
||||
auto out_p = reinterpret_cast<ov::float16*>(out_ptr);
|
||||
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<ov::float16>::lowest(); });
|
||||
} else if (output_prec == Precision::U8) {
|
||||
auto out_p = reinterpret_cast<uint8_t *>(out_ptr);
|
||||
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<uint8_t>::min(); });
|
||||
@ -2898,6 +2937,9 @@ inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
|
||||
} else if (output_prec == Precision::BF16) {
|
||||
auto out_p = reinterpret_cast<bfloat16_t*>(out_ptr);
|
||||
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<bfloat16_t>::max(); });
|
||||
} else if (output_prec == Precision::FP16) {
|
||||
auto out_p = reinterpret_cast<ov::float16*>(out_ptr);
|
||||
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<ov::float16>::max(); });
|
||||
} else if (output_prec == Precision::U8) {
|
||||
auto out_p = reinterpret_cast<uint8_t *>(out_ptr);
|
||||
parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<uint8_t>::max(); });
|
||||
@ -3268,6 +3310,7 @@ std::vector<int> Reduce::update_src_dims() {
|
||||
bool Reduce::canApplyJIT(const Precision &input_prec, const Precision &output_prec) const {
|
||||
static const Precision supportedPrecisions[] = {
|
||||
Precision::FP32,
|
||||
Precision::FP16,
|
||||
Precision::BF16,
|
||||
Precision::I32,
|
||||
Precision::I8,
|
||||
|
@ -74,7 +74,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*OVCompiledModelBaseTest.*(CanGetInputsInfoAndCheck|canSetConfigToCompiledModel).*)",
|
||||
R"(.*Behavior.*CorrectConfigCheck.*(canSetConfigAndCheckGetConfig|canSetConfigTwiceAndCheckGetConfig).*CPU_BIND_THREAD=YES.*)",
|
||||
// Issue: 72021 Unreasonable abs_threshold for comparing bf16 results
|
||||
R"(.*smoke_Reduce.*type=(Prod|Min).*netPRC=(BF|bf)16.*)",
|
||||
R"(.*smoke_Reduce.*type=(Prod|Min).*INFERENCE_PRECISION_HINT=(BF|bf)16.*)",
|
||||
// TODO: 56520 Accuracy mismatch
|
||||
R"(.*ReduceOpsLayerTest.*type=Mean_.*netPRC=(I64|I32).*)",
|
||||
R"(.*ReduceOpsLayerTest.*type=Mean_.*netPRC=U64.*)",
|
||||
@ -246,6 +246,12 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
retVector.emplace_back(R"(.*Snippets.*MHA.*)");
|
||||
retVector.emplace_back(R"(.*Snippets.*(MatMul|Matmul).*)");
|
||||
}
|
||||
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
|
||||
if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) {
|
||||
// Skip fp16 tests for paltforms that don't support fp16 precision
|
||||
retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)");
|
||||
}
|
||||
#endif
|
||||
if (!InferenceEngine::with_cpu_x86_avx512_core_vnni() && !InferenceEngine::with_cpu_x86_avx512_core_amx_int8()) {
|
||||
// MatMul in Snippets uses BRGEMM that supports i8 only on platforms with VNNI or AMX instructions
|
||||
retVector.emplace_back(R"(.*Snippets.*MatMulFQ.*)");
|
||||
|
@ -18,7 +18,8 @@ std::string ReduceCPULayerTest::getTestCaseName(testing::TestParamInfo<ReduceLay
|
||||
basicReduceParams basicParams;
|
||||
CPUSpecificParams cpuParams;
|
||||
fusingSpecificParams fusingParams;
|
||||
std::tie(basicParams, cpuParams, fusingParams) = obj.param;
|
||||
std::map<std::string, ov::element::Type> additionalConfig;
|
||||
std::tie(basicParams, cpuParams, fusingParams, additionalConfig) = obj.param;
|
||||
|
||||
std::vector<int> axes;
|
||||
CommonTestUtils::OpType opType;
|
||||
@ -51,6 +52,13 @@ std::string ReduceCPULayerTest::getTestCaseName(testing::TestParamInfo<ReduceLay
|
||||
result << "inPRC=" << inPrc << "_";
|
||||
result << "outPRC=" << outPrc << "_";
|
||||
|
||||
if (!additionalConfig.empty()) {
|
||||
result << "PluginConf";
|
||||
for (auto& item : additionalConfig) {
|
||||
result << "_" << item.first << "=" << item.second.get_type_name();
|
||||
}
|
||||
}
|
||||
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
result << CpuTestWithFusing::getTestCaseName(fusingParams);
|
||||
|
||||
@ -63,7 +71,8 @@ void ReduceCPULayerTest::SetUp() {
|
||||
basicReduceParams basicParams;
|
||||
CPUSpecificParams cpuParams;
|
||||
fusingSpecificParams fusingParams;
|
||||
std::tie(basicParams, cpuParams, fusingParams) = this->GetParam();
|
||||
std::map<std::string, ov::element::Type> additionalConfig;
|
||||
std::tie(basicParams, cpuParams, fusingParams, additionalConfig) = this->GetParam();
|
||||
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
|
||||
@ -75,7 +84,18 @@ void ReduceCPULayerTest::SetUp() {
|
||||
std::vector<InputShape> inputShapes;
|
||||
|
||||
std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inputShapes) = basicParams;
|
||||
inPrc = outPrc = netPrecision;
|
||||
if (netPrecision == ElementType::boolean) {
|
||||
inPrc = outPrc = netPrecision;
|
||||
} else {
|
||||
if (additionalConfig[ov::hint::inference_precision.name()] == ov::element::bf16) {
|
||||
inPrc = outPrc = netPrecision = ElementType::bf16;
|
||||
} else if (additionalConfig[ov::hint::inference_precision.name()] == ov::element::f16) {
|
||||
inPrc = outPrc = netPrecision = ElementType::f16;
|
||||
} else {
|
||||
inPrc = outPrc = netPrecision;
|
||||
}
|
||||
}
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
|
||||
init_input_shapes(inputShapes);
|
||||
|
||||
@ -144,6 +164,11 @@ void ReduceCPULayerTest::generate_inputs(const std::vector<ngraph::Shape>& targe
|
||||
for (size_t i = 0; i < tensor.get_size(); ++i) {
|
||||
rawBlobDataPtr[i] /= 10.f;
|
||||
}
|
||||
} else if (netPrecision == ElementType::f16) {
|
||||
auto *rawBlobDataPtr = static_cast<ngraph::float16 *>(tensor.data());
|
||||
for (size_t i = 0; i < tensor.get_size(); ++i) {
|
||||
rawBlobDataPtr[i] /= 10.f;
|
||||
}
|
||||
} else if (netPrecision == ElementType::bf16) {
|
||||
auto* rawBlobDataPtr = static_cast<ngraph::bfloat16*>(tensor.data());
|
||||
for (size_t i = 0; i < tensor.get_size(); ++i) {
|
||||
@ -222,10 +247,29 @@ const std::vector<ngraph::helpers::ReductionType>& reductionTypes() {
|
||||
}
|
||||
|
||||
const std::vector<ElementType>& inpOutPrc() {
|
||||
static const std::vector<ElementType> inpOutPrc = {ElementType::bf16, ElementType::f32};
|
||||
static const std::vector<ElementType> inpOutPrc = {ElementType::f32};
|
||||
return inpOutPrc;
|
||||
}
|
||||
|
||||
const std::vector<std::map<std::string, ov::element::Type>> additionalConfig() {
|
||||
static const std::vector<std::map<std::string, ov::element::Type>> additionalConfig = {
|
||||
{{ov::hint::inference_precision.name(), ov::element::f32}},
|
||||
{{ov::hint::inference_precision.name(), ov::element::bf16}},
|
||||
// ARM doesn't support FP16 for now
|
||||
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
|
||||
{{ov::hint::inference_precision.name(), ov::element::f16}},
|
||||
#endif
|
||||
};
|
||||
return additionalConfig;
|
||||
}
|
||||
|
||||
const std::vector<std::map<std::string, ov::element::Type>> additionalConfigFP32() {
|
||||
static const std::vector<std::map<std::string, ov::element::Type>> additionalConfig = {
|
||||
{{ov::hint::inference_precision.name(), ov::element::f32}}
|
||||
};
|
||||
return additionalConfig;
|
||||
}
|
||||
|
||||
const std::vector<ngraph::helpers::ReductionType>& reductionTypesInt32() {
|
||||
static const std::vector<ngraph::helpers::ReductionType> reductionTypesInt32 = {
|
||||
ngraph::helpers::ReductionType::Sum,
|
||||
|
@ -29,7 +29,8 @@ typedef std::tuple<
|
||||
typedef std::tuple<
|
||||
basicReduceParams,
|
||||
CPUSpecificParams,
|
||||
fusingSpecificParams> ReduceLayerCPUTestParamSet;
|
||||
fusingSpecificParams,
|
||||
std::map<std::string, ov::element::Type>> ReduceLayerCPUTestParamSet;
|
||||
|
||||
class ReduceCPULayerTest : public testing::WithParamInterface<ReduceLayerCPUTestParamSet>,
|
||||
virtual public SubgraphBaseTest, public CpuTestWithFusing {
|
||||
@ -52,6 +53,8 @@ const std::vector<std::vector<int>>& axesND();
|
||||
const std::vector<CommonTestUtils::OpType>& opTypes();
|
||||
const std::vector<ngraph::helpers::ReductionType>& reductionTypes();
|
||||
const std::vector<ElementType>& inpOutPrc();
|
||||
const std::vector<std::map<std::string, ov::element::Type>> additionalConfig();
|
||||
const std::vector<std::map<std::string, ov::element::Type>> additionalConfigFP32();
|
||||
const std::vector<ngraph::helpers::ReductionType>& reductionTypesInt32();
|
||||
|
||||
} // namespace Reduce
|
||||
|
@ -67,7 +67,8 @@ const auto params_OneAxis = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_OneAxis_dynamic = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -80,7 +81,8 @@ const auto params_OneAxis_dynamic = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dynamic_3dims)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_MultiAxis_4D = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -93,7 +95,8 @@ const auto params_MultiAxis_4D = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_MultiAxis_4D_dynamic = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -106,7 +109,8 @@ const auto params_MultiAxis_4D_dynamic = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dynamic_2dims)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_Int32 = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -119,7 +123,8 @@ const auto params_Int32 = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_Int32)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Reduce_OneAxis_CPU,
|
||||
|
@ -132,6 +132,7 @@ const std::vector<fusingSpecificParams> fusingParamsSet_KeepNoDims {
|
||||
fusingScaleShift
|
||||
};
|
||||
|
||||
/* ================================ 1.1 No fusion - Arithmetic ================================ */
|
||||
const auto params_OneAxis = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axes()),
|
||||
@ -143,7 +144,8 @@ const auto params_OneAxis = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_MultiAxis_4D = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -156,7 +158,64 @@ const auto params_MultiAxis_4D = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_MultiAxis_5D = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axes5D),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(true),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_5D_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_MultiAxis_4D_Hybrid = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axesND()),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(false),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_MultiAxis_5D_Hybrid = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axes5D),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(false),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_5D_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_MultiAxis_6D = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axes6D),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::ValuesIn(keepDims()),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_6D_dyn)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_Int32 = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -169,7 +228,36 @@ const auto params_Int32 = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_Int32_dyn)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_NHWC_SmallChannel = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axesHW),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(true),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_SmallChannel_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_SingleBatch = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axes()),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(true),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_SingleBatch_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Reduce_OneAxis_CPU,
|
||||
@ -185,91 +273,6 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
ReduceCPULayerTest::getTestCaseName
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Reduce_Int32_CPU,
|
||||
ReduceCPULayerTest,
|
||||
params_Int32,
|
||||
ReduceCPULayerTest::getTestCaseName
|
||||
);
|
||||
|
||||
const auto params_MultiAxis_5D = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axes5D),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(true),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_5D_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
|
||||
const auto params_MultiAxis_4D_Hybrid = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axesND()),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(false),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
|
||||
const auto params_MultiAxis_5D_Hybrid = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axes5D),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(false),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_5D_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
|
||||
const auto params_MultiAxis_6D = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axes6D),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::ValuesIn(keepDims()),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_6D_dyn)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::Values(emptyFusingSpec));
|
||||
|
||||
const auto params_NHWC_SmallChannel = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axesHW),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(true),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_SmallChannel_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
|
||||
const auto params_SingleBatch = testing::Combine(
|
||||
testing::Combine(
|
||||
testing::ValuesIn(axes()),
|
||||
testing::Values(CommonTestUtils::OpType::VECTOR),
|
||||
testing::Values(true),
|
||||
testing::ValuesIn(reductionTypes()),
|
||||
testing::ValuesIn(inpOutPrc()),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_SingleBatch_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Reduce_MultiAxis_5D_CPU,
|
||||
ReduceCPULayerTest,
|
||||
@ -298,6 +301,13 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
ReduceCPULayerTest::getTestCaseName
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Reduce_Int32_CPU,
|
||||
ReduceCPULayerTest,
|
||||
params_Int32,
|
||||
ReduceCPULayerTest::getTestCaseName
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Reduce_NHWC_SmallChannel_CPU,
|
||||
ReduceCPULayerTest,
|
||||
@ -324,7 +334,8 @@ const auto params_OneAxis_Logical = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_MultiAxis_4D_Logical = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -337,7 +348,8 @@ const auto params_MultiAxis_4D_Logical = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_MultiAxis_5D_Logical = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -350,7 +362,8 @@ const auto params_MultiAxis_5D_Logical = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_5D_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_MultiAxis_4D_Hybrid_Logical = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -363,7 +376,8 @@ const auto params_MultiAxis_4D_Hybrid_Logical = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_MultiAxis_5D_Hybrid_Logical = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -376,7 +390,8 @@ const auto params_MultiAxis_5D_Hybrid_Logical = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_5D_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_MultiAxis_6D_Logical = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -389,7 +404,8 @@ const auto params_MultiAxis_6D_Logical = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_6D_dyn)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::Values(emptyFusingSpec));
|
||||
testing::Values(emptyFusingSpec),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Reduce_OneAxis_Logical_CPU,
|
||||
@ -445,7 +461,8 @@ const auto params_OneAxis_fusing = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::ValuesIn(fusingParamsSet));
|
||||
testing::ValuesIn(fusingParamsSet),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_MultiAxis_4D_fusing = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -458,7 +475,8 @@ const auto params_MultiAxis_4D_fusing = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
|
||||
testing::ValuesIn(fusingParamsSet));
|
||||
testing::ValuesIn(fusingParamsSet),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
const auto params_MultiAxis_5D_fusing = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -471,7 +489,8 @@ const auto params_MultiAxis_5D_fusing = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_5D_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
|
||||
testing::ValuesIn(fusingParamsSet));
|
||||
testing::ValuesIn(fusingParamsSet),
|
||||
testing::ValuesIn(additionalConfig()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Reduce_OneAxis_fusing_CPU,
|
||||
@ -506,7 +525,8 @@ const auto params_OneAxis_fusing_KeepNoDims = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::Values(emptyCPUSpec),
|
||||
testing::ValuesIn(fusingParamsSet_KeepNoDims));
|
||||
testing::ValuesIn(fusingParamsSet_KeepNoDims),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_MultiAxis_4D_Hybrid_fusing_KeepNoDims = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -519,7 +539,8 @@ const auto params_MultiAxis_4D_Hybrid_fusing_KeepNoDims = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_4D)),
|
||||
testing::ValuesIn(fusingParamsSet_KeepNoDims));
|
||||
testing::ValuesIn(fusingParamsSet_KeepNoDims),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
const auto params_MultiAxis_5D_Hybrid_fusing_KeepNoDims = testing::Combine(
|
||||
testing::Combine(
|
||||
@ -532,7 +553,8 @@ const auto params_MultiAxis_5D_Hybrid_fusing_KeepNoDims = testing::Combine(
|
||||
testing::Values(ElementType::undefined),
|
||||
testing::ValuesIn(inputShapes_5D_dyn)),
|
||||
testing::ValuesIn(filterCPUSpecificParams(cpuParams_HybridLayout_5D)),
|
||||
testing::ValuesIn(fusingParamsSet_KeepNoDims));
|
||||
testing::ValuesIn(fusingParamsSet_KeepNoDims),
|
||||
testing::ValuesIn(additionalConfigFP32()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Reduce_OneAxis_fusing_KeepNoDims_CPU,
|
||||
|
Loading…
Reference in New Issue
Block a user