diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index 5371c1c1871..ceaf25a5267 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -1797,7 +1797,7 @@ Interpolate::Interpolate(const std::shared_ptr& op, const GraphContext std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Interpolate node with name '" + getName() + "'"; - + dataRank = getInputShapeAtPort(DATA_ID).getRank(); if (const auto interp = std::dynamic_pointer_cast(op)) { is_version11 = false; const auto numInputs = inputShapes.size(); @@ -1809,7 +1809,6 @@ Interpolate::Interpolate(const std::shared_ptr& op, const GraphContext const auto &interpAttr = interp->get_attrs(); - const size_t dataRank = getInputShapeAtPort(DATA_ID).getRank(); const auto &interpMode = interpAttr.mode; if (interpMode == ngInterpMode::NEAREST) { interpAttrs.mode = InterpolateMode::nearest; @@ -1911,8 +1910,6 @@ Interpolate::Interpolate(const std::shared_ptr& op, const GraphContext isAxesSpecified = numInputs != 2; const auto &interpAttr = interp->get_attrs(); - - const size_t dataRank = getInputShapeAtPort(DATA_ID).getRank(); const auto &interpMode = interpAttr.mode; if (interpMode == ngInterpMode::BILINEAR_PILLOW) { interpAttrs.mode = InterpolateMode::bilinear_pillow; @@ -1984,8 +1981,6 @@ void Interpolate::getSupportedDescriptors() { if (getChildEdges().empty()) OPENVINO_THROW(errorPrefix, " has incorrect number of output edges"); - int dataRank = getInputShapeAtPort(DATA_ID).getRank(); - // get pad for (size_t i = 0; i < interpAttrs.padBegin.size(); i++) { if (interpAttrs.padBegin[i] != 0) { @@ -2030,9 +2025,16 @@ void Interpolate::initSupportedPrimitiveDescriptors() { if ((inputPrecision != ov::element::i8) && (inputPrecision != ov::element::u8) && (inputPrecision != ov::element::bf16)) { inputPrecision = ov::element::f32; } + if ((inputPrecision == ov::element::bf16) && !mayiuse(avx512_core)) { inputPrecision = ov::element::f32; } + + // support input with rank<=3 only with float precision and planar layout. + // Jit for avx2(gather is available) and ref for no-avx2 machine. + if (!one_of(dataRank, 4u, 5u)) { + inputPrecision = ov::element::f32; + } ov::element::Type outputPrecision = inputPrecision; if (!fusedWith.empty()) { @@ -2117,7 +2119,7 @@ void Interpolate::initSupportedPrimitiveDescriptors() { return; #endif - if (getInputShapeAtPort(DATA_ID).getRank() == 4) { + if (dataRank == 4) { if (mayiuse(cpu::x64::avx512_core)) { if (NCHWAsNHWC) pushDesc(LayoutType::ncsp, jit_avx512, true); @@ -2138,7 +2140,7 @@ void Interpolate::initSupportedPrimitiveDescriptors() { pushDesc(LayoutType::ncsp, ref, true); } else { const auto &dataMinDims = getInputShapeAtPort(DATA_ID).getMinDims(); - bool isBlkApplied = getInputShapeAtPort(DATA_ID).getRank() > 1 && dataMinDims[1] != Shape::UNDEFINED_DIM && dataMinDims[1] > 1; + bool isBlkApplied = dataRank > 1 && dataMinDims[1] != Shape::UNDEFINED_DIM && dataMinDims[1] > 1; #if defined (OV_CPU_WITH_ACL) interpAttrs.hasPad = hasPad; @@ -2153,7 +2155,7 @@ void Interpolate::initSupportedPrimitiveDescriptors() { pushDesc(LayoutType::ncsp, ref, false); } else { // blk and by_channel JIT kernel on sse41 or above machine - if (getInputShapeAtPort(DATA_ID).getRank() == 4 || (getInputShapeAtPort(DATA_ID).getRank() == 5 && interpAttrs.mode != InterpolateMode::cubic)) { + if (dataRank == 4 || (dataRank == 5 && interpAttrs.mode != InterpolateMode::cubic)) { if (mayiuse(cpu::x64::avx512_core)) { pushDesc(LayoutType::nspc, jit_avx512, false); if (isBlkApplied) @@ -2169,9 +2171,14 @@ void Interpolate::initSupportedPrimitiveDescriptors() { } } - // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) - if (mayiuse(cpu::x64::avx2) && inputPrecision == ov::element::f32) { - pushDesc(LayoutType::ncsp, jit_avx2, false); + // planar is only for float precision. + // 1.ref on machine w/o avx2(no fuse) + // 2.JIT kernel for avx2(gatherps is available).(with fuse) + if (inputPrecision == ov::element::f32) { + if (mayiuse(cpu::x64::avx2)) + pushDesc(LayoutType::ncsp, jit_avx2, false); + else + pushDesc(LayoutType::ncsp, ref, false); } } } @@ -2435,7 +2442,6 @@ SizeVector Interpolate::getPaddedInputShape(const VectorDims &srcDims, // if "size" version: scales = shape[target] / shape[input].pad, 1.f for other dims not in axis // scales is a required input, but should not use input scales when "size" case, which may added eps or is a dummy value, recalculate scales instead. std::vector Interpolate::getScales(const VectorDims &srcDimPad, const VectorDims &dstDim) { - const size_t dataRank = getInputShapeAtPort(DATA_ID).getRank(); std::vector fullScales(dataRank, 1.f); const size_t axesRank = axes.size(); for (size_t i = 0; i < axesRank; i++) { @@ -3973,7 +3979,8 @@ bool Interpolate::canFuse(const NodePtr& node) const { if (!mayiuse(cpu::x64::sse41) || interpAttrs.mode == InterpolateMode::linear || interpAttrs.mode == InterpolateMode::bilinear_pillow || - interpAttrs.mode == InterpolateMode::bicubic_pillow) { + interpAttrs.mode == InterpolateMode::bicubic_pillow || + (!one_of(dataRank, 4u, 5u) && !mayiuse(cpu::x64::avx2))) { return false; } diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.h b/src/plugins/intel_cpu/src/nodes/interpolate.h index 492ae9d6be9..522ffb3d7c7 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.h +++ b/src/plugins/intel_cpu/src/nodes/interpolate.h @@ -110,6 +110,7 @@ private: // 2. axis alignment [1,2] to [2,3]. // 3. config planar layout support and treated it as channel_first layout. bool NCHWAsNHWC = false; + size_t dataRank = 0; class InterpolateExecutorBase { public: diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/interpolate.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/interpolate.cpp index 3f981101463..bcf1bb5cc1a 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/interpolate.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/interpolate.cpp @@ -268,26 +268,6 @@ TEST_P(InterpolateLayerCPUTest, CompareWithRefs) { } namespace { - -/* CPU PARAMS */ -std::vector filterCPUInfoForDevice() { - std::vector resCPUParams; - if (InferenceEngine::with_cpu_x86_avx512f()) { - resCPUParams.push_back(CPUSpecificParams{{nChw16c, x, x, x}, {nChw16c}, {"jit_avx512"}, "jit_avx512"}); - resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx512"}, "jit_avx512"}); - } else if (InferenceEngine::with_cpu_x86_avx2()) { - resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_avx2"}, "jit_avx2"}); - resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx2"}, "jit_avx2"}); - resCPUParams.push_back(CPUSpecificParams{{nchw, x, x, x}, {nchw}, {"jit_avx2"}, "jit_avx2"}); - } else if (InferenceEngine::with_cpu_x86_sse42()) { - resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_sse42"}, "jit_sse42"}); - resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_sse42"}, "jit_sse42"}); - } else { - resCPUParams.push_back(CPUSpecificParams{{nchw, x, x, x}, {nchw}, {"ref"}, "ref"}); - } - return resCPUParams; -} -/* ========== */ const std::vector coordinateTransformModes_Smoke = { ov::op::v11::Interpolate::CoordinateTransformMode::HALF_PIXEL, ov::op::v11::Interpolate::CoordinateTransformMode::ASYMMETRIC, @@ -344,11 +324,237 @@ std::vector> filterAdditionalConfig() { } else { return { // default config as an stub for target without avx512, otherwise all tests with BF16 in its name are skipped - {{InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::NO}} + {} }; } } +// 3D +std::vector filterCPUInfoForDevice3D() { + std::vector resCPUParams; + if (InferenceEngine::with_cpu_x86_avx2()) { + resCPUParams.push_back(CPUSpecificParams{{ncw, x, x, x}, {ncw}, {"jit_avx2"}, "jit_avx2"}); + } else { + resCPUParams.push_back(CPUSpecificParams{{ncw, x, x, x}, {ncw}, {"ref"}, "ref"}); + } + return resCPUParams; +} + +std::vector> filterAdditionalConfig3D() { + return { + {} + }; +} + +const std::vector> pads3D_smoke = { + {0, 0, 0}, +}; + +const std::vector> pads3D_full = { + {0, 0, 1}, +}; + +const std::vector> defaultAxes3D = { + {0, 1, 2} +}; + +const std::vector shapeParams3D = { + ShapeParams{ + ov::op::v11::Interpolate::ShapeCalcMode::SCALES, + InputShape{{-1, {2, 20}, -1}, {{1, 3, 4}, {2, 4, 6}, {1, 3, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{1.f, 1.f, 1.25f}, {1.f, 1.f, 1.25f}, {1.f, 1.f, 1.5f}}, + defaultAxes3D.front() + }, + ShapeParams{ + ov::op::v11::Interpolate::ShapeCalcMode::SIZES, + InputShape{{-1, {2, 20}, -1}, {{1, 3, 4}, {2, 4, 6}, {1, 3, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{1, 3, 6}, {2, 4, 8}, {1, 3, 6}}, + defaultAxes3D.front() + } +}; + +const auto interpolateCasesNN_Smoke_3D = ::testing::Combine( + ::testing::Values(ov::op::v11::Interpolate::InterpolateMode::NEAREST), + ::testing::ValuesIn(coordinateTransformModes_Smoke), + ::testing::ValuesIn(nearestModes_Smoke), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads3D_smoke), + ::testing::ValuesIn(pads3D_smoke), + ::testing::ValuesIn(cubeCoefs)); +INSTANTIATE_TEST_SUITE_P(smoke_InterpolateNN_Layout_Test_3D, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCasesNN_Smoke_3D, + ::testing::ValuesIn(shapeParams3D), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice3D()), + ::testing::ValuesIn(interpolateFusingParamsSet), + ::testing::ValuesIn(filterAdditionalConfig3D())), + InterpolateLayerCPUTest::getTestCaseName); + +const auto interpolateCasesNN_Full_3D = ::testing::Combine( + ::testing::Values(ov::op::v11::Interpolate::InterpolateMode::NEAREST), + ::testing::ValuesIn(coordinateTransformModes_Full), + ::testing::ValuesIn(nearestModes_Full), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads3D_full), + ::testing::ValuesIn(pads3D_full), + ::testing::ValuesIn(cubeCoefs)); +INSTANTIATE_TEST_SUITE_P(InterpolateNN_Layout_Test_3D, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCasesNN_Full_3D, + ::testing::ValuesIn(shapeParams3D), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice3D()), + ::testing::ValuesIn(interpolateFusingParamsSet), + ::testing::ValuesIn(filterAdditionalConfig3D())), + InterpolateLayerCPUTest::getTestCaseName); + +#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) +const std::vector interpolateFusingParamsSet3D_fixed_C() { + std::vector fuseParams; + if (InferenceEngine::with_cpu_x86_avx2()) { + fuseParams.push_back(fusingFakeQuantizePerChannelRelu); + fuseParams.push_back(fusingMultiplyPerChannel); + } + fuseParams.push_back(emptyFusingSpec); + return fuseParams; +} + +const std::vector shapeParams3D_fixed_C = { + ShapeParams{ + ov::op::v11::Interpolate::ShapeCalcMode::SCALES, + InputShape{{}, {{1, 3, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1.f, 1.f, 1.25f}}, + defaultAxes3D.front() + }, + ShapeParams{ + ov::op::v11::Interpolate::ShapeCalcMode::SIZES, + InputShape{{-1, 3, -1}, {{1, 3, 4}, {1, 3, 6}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1, 3, 8}}, + defaultAxes3D.front() + } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_InterpolateNN_Layout_PerChannelFuse3D_Test, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCasesNN_Smoke_3D, + ::testing::ValuesIn(shapeParams3D_fixed_C), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice3D()), + ::testing::ValuesIn(interpolateFusingParamsSet3D_fixed_C()), + ::testing::ValuesIn(filterAdditionalConfig3D())), + InterpolateLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(InterpolateNN_Layout_PerChannelFuse3D_Test, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCasesNN_Full_3D, + ::testing::ValuesIn(shapeParams3D_fixed_C), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice3D()), + ::testing::ValuesIn(interpolateFusingParamsSet3D_fixed_C()), + ::testing::ValuesIn(filterAdditionalConfig3D())), + InterpolateLayerCPUTest::getTestCaseName); +#endif + +const auto interpolateCasesLinear3D_Smoke = ::testing::Combine( + ::testing::Values(ov::op::v11::Interpolate::InterpolateMode::LINEAR), + ::testing::ValuesIn(coordinateTransformModes_Smoke), + ::testing::ValuesIn(defNearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads3D_smoke), + ::testing::ValuesIn(pads3D_smoke), + ::testing::ValuesIn(cubeCoefs)); + +const auto interpolateCasesLinear3D_Full = ::testing::Combine( + ::testing::Values(ov::op::v11::Interpolate::InterpolateMode::LINEAR), + ::testing::ValuesIn(coordinateTransformModes_Full), + ::testing::ValuesIn(defNearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads3D_full), + ::testing::ValuesIn(pads3D_full), + ::testing::ValuesIn(cubeCoefs)); + +INSTANTIATE_TEST_SUITE_P(smoke_InterpolateLinear_Layout3D_Test, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCasesLinear3D_Smoke, + ::testing::ValuesIn(shapeParams3D), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice3D()), + ::testing::ValuesIn(interpolateFusingParamsSet), + ::testing::ValuesIn(filterAdditionalConfig3D())), + InterpolateLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(InterpolateLinear_Layout3D_Test, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCasesLinear3D_Full, + ::testing::ValuesIn(shapeParams3D), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice3D()), + ::testing::ValuesIn(interpolateFusingParamsSet), + ::testing::ValuesIn(filterAdditionalConfig3D())), + InterpolateLayerCPUTest::getTestCaseName); + +const auto interpolateCasesCubic3D_Smoke = ::testing::Combine( + ::testing::Values(ov::op::v11::Interpolate::InterpolateMode::CUBIC), + ::testing::ValuesIn(coordinateTransformModes_Smoke), + ::testing::ValuesIn(defNearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads3D_smoke), + ::testing::ValuesIn(pads3D_smoke), + ::testing::ValuesIn(cubeCoefs)); + +const auto interpolateCasesCubic3D_Full = ::testing::Combine( + ::testing::Values(ov::op::v11::Interpolate::InterpolateMode::CUBIC), + ::testing::ValuesIn(coordinateTransformModes_Full), + ::testing::ValuesIn(defNearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads3D_full), + ::testing::ValuesIn(pads3D_full), + ::testing::ValuesIn(cubeCoefs)); + +INSTANTIATE_TEST_SUITE_P(smoke_InterpolateCubic_Layout3D_Test, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCasesCubic3D_Smoke, + ::testing::ValuesIn(shapeParams3D), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice3D()), + ::testing::ValuesIn(interpolateFusingParamsSet), + ::testing::ValuesIn(filterAdditionalConfig3D())), + InterpolateLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(InterpolateCubic_Layout3D_Test, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCasesCubic3D_Full, + ::testing::ValuesIn(shapeParams3D), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice3D()), + ::testing::ValuesIn(interpolateFusingParamsSet), + ::testing::ValuesIn(filterAdditionalConfig3D())), + InterpolateLayerCPUTest::getTestCaseName); + +// 4D +std::vector filterCPUInfoForDevice() { + std::vector resCPUParams; + if (InferenceEngine::with_cpu_x86_avx512f()) { + resCPUParams.push_back(CPUSpecificParams{{nChw16c, x, x, x}, {nChw16c}, {"jit_avx512"}, "jit_avx512"}); + resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx512"}, "jit_avx512"}); + } else if (InferenceEngine::with_cpu_x86_avx2()) { + resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_avx2"}, "jit_avx2"}); + resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx2"}, "jit_avx2"}); + resCPUParams.push_back(CPUSpecificParams{{nchw, x, x, x}, {nchw}, {"jit_avx2"}, "jit_avx2"}); + } else if (InferenceEngine::with_cpu_x86_sse42()) { + resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_sse42"}, "jit_sse42"}); + resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_sse42"}, "jit_sse42"}); + } else { + resCPUParams.push_back(CPUSpecificParams{{nchw, x, x, x}, {nchw}, {"ref"}, "ref"}); + } + return resCPUParams; +} + const std::vector> pads4D = { {0, 0, 0, 0}, {0, 0, 1, 1}, @@ -444,6 +650,12 @@ INSTANTIATE_TEST_SUITE_P(InterpolateNN_Layout_Test, InterpolateLayerCPUTest, ::testing::ValuesIn(filterAdditionalConfig())), InterpolateLayerCPUTest::getTestCaseName); +#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) +const std::vector interpolateFusingParamsSet_fixed_C{ + fusingFakeQuantizePerChannelRelu, + fusingMultiplyPerChannel, +}; + const std::vector shapeParams4D_fixed_C = { ShapeParams{ ov::op::v11::Interpolate::ShapeCalcMode::SCALES, @@ -461,12 +673,6 @@ const std::vector shapeParams4D_fixed_C = { } }; -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) -const std::vector interpolateFusingParamsSet_fixed_C{ - fusingFakeQuantizePerChannelRelu, - fusingMultiplyPerChannel, -}; - INSTANTIATE_TEST_SUITE_P(smoke_InterpolateNN_Layout_PerChannelFuse_Test, InterpolateLayerCPUTest, ::testing::Combine( interpolateCasesNN_Smoke,