[CPU] Interpolate extend to rank 3 input support (#21003)

This commit is contained in:
Chenhu Wang 2023-11-27 15:32:18 +08:00 committed by GitHub
parent eaae00c2ca
commit 6b898fc8d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 255 additions and 41 deletions

View File

@ -1797,7 +1797,7 @@ Interpolate::Interpolate(const std::shared_ptr<ov::Node>& op, const GraphContext
std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) {
errorPrefix = "Interpolate node with name '" + getName() + "'";
dataRank = getInputShapeAtPort(DATA_ID).getRank();
if (const auto interp = std::dynamic_pointer_cast<const ov::opset4::Interpolate>(op)) {
is_version11 = false;
const auto numInputs = inputShapes.size();
@ -1809,7 +1809,6 @@ Interpolate::Interpolate(const std::shared_ptr<ov::Node>& op, const GraphContext
const auto &interpAttr = interp->get_attrs();
const size_t dataRank = getInputShapeAtPort(DATA_ID).getRank();
const auto &interpMode = interpAttr.mode;
if (interpMode == ngInterpMode::NEAREST) {
interpAttrs.mode = InterpolateMode::nearest;
@ -1911,8 +1910,6 @@ Interpolate::Interpolate(const std::shared_ptr<ov::Node>& op, const GraphContext
isAxesSpecified = numInputs != 2;
const auto &interpAttr = interp->get_attrs();
const size_t dataRank = getInputShapeAtPort(DATA_ID).getRank();
const auto &interpMode = interpAttr.mode;
if (interpMode == ngInterpMode::BILINEAR_PILLOW) {
interpAttrs.mode = InterpolateMode::bilinear_pillow;
@ -1984,8 +1981,6 @@ void Interpolate::getSupportedDescriptors() {
if (getChildEdges().empty())
OPENVINO_THROW(errorPrefix, " has incorrect number of output edges");
int dataRank = getInputShapeAtPort(DATA_ID).getRank();
// get pad
for (size_t i = 0; i < interpAttrs.padBegin.size(); i++) {
if (interpAttrs.padBegin[i] != 0) {
@ -2030,9 +2025,16 @@ void Interpolate::initSupportedPrimitiveDescriptors() {
if ((inputPrecision != ov::element::i8) && (inputPrecision != ov::element::u8) && (inputPrecision != ov::element::bf16)) {
inputPrecision = ov::element::f32;
}
if ((inputPrecision == ov::element::bf16) && !mayiuse(avx512_core)) {
inputPrecision = ov::element::f32;
}
// support input with rank<=3 only with float precision and planar layout.
// Jit for avx2(gather is available) and ref for no-avx2 machine.
if (!one_of(dataRank, 4u, 5u)) {
inputPrecision = ov::element::f32;
}
ov::element::Type outputPrecision = inputPrecision;
if (!fusedWith.empty()) {
@ -2117,7 +2119,7 @@ void Interpolate::initSupportedPrimitiveDescriptors() {
return;
#endif
if (getInputShapeAtPort(DATA_ID).getRank() == 4) {
if (dataRank == 4) {
if (mayiuse(cpu::x64::avx512_core)) {
if (NCHWAsNHWC)
pushDesc(LayoutType::ncsp, jit_avx512, true);
@ -2138,7 +2140,7 @@ void Interpolate::initSupportedPrimitiveDescriptors() {
pushDesc(LayoutType::ncsp, ref, true);
} else {
const auto &dataMinDims = getInputShapeAtPort(DATA_ID).getMinDims();
bool isBlkApplied = getInputShapeAtPort(DATA_ID).getRank() > 1 && dataMinDims[1] != Shape::UNDEFINED_DIM && dataMinDims[1] > 1;
bool isBlkApplied = dataRank > 1 && dataMinDims[1] != Shape::UNDEFINED_DIM && dataMinDims[1] > 1;
#if defined (OV_CPU_WITH_ACL)
interpAttrs.hasPad = hasPad;
@ -2153,7 +2155,7 @@ void Interpolate::initSupportedPrimitiveDescriptors() {
pushDesc(LayoutType::ncsp, ref, false);
} else {
// blk and by_channel JIT kernel on sse41 or above machine
if (getInputShapeAtPort(DATA_ID).getRank() == 4 || (getInputShapeAtPort(DATA_ID).getRank() == 5 && interpAttrs.mode != InterpolateMode::cubic)) {
if (dataRank == 4 || (dataRank == 5 && interpAttrs.mode != InterpolateMode::cubic)) {
if (mayiuse(cpu::x64::avx512_core)) {
pushDesc(LayoutType::nspc, jit_avx512, false);
if (isBlkApplied)
@ -2169,9 +2171,14 @@ void Interpolate::initSupportedPrimitiveDescriptors() {
}
}
// planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse)
if (mayiuse(cpu::x64::avx2) && inputPrecision == ov::element::f32) {
pushDesc(LayoutType::ncsp, jit_avx2, false);
// planar is only for float precision.
// 1.ref on machine w/o avx2(no fuse)
// 2.JIT kernel for avx2(gatherps is available).(with fuse)
if (inputPrecision == ov::element::f32) {
if (mayiuse(cpu::x64::avx2))
pushDesc(LayoutType::ncsp, jit_avx2, false);
else
pushDesc(LayoutType::ncsp, ref, false);
}
}
}
@ -2435,7 +2442,6 @@ SizeVector Interpolate::getPaddedInputShape(const VectorDims &srcDims,
// if "size" version: scales = shape[target] / shape[input].pad, 1.f for other dims not in axis
// scales is a required input, but should not use input scales when "size" case, which may added eps or is a dummy value, recalculate scales instead.
std::vector<float> Interpolate::getScales(const VectorDims &srcDimPad, const VectorDims &dstDim) {
const size_t dataRank = getInputShapeAtPort(DATA_ID).getRank();
std::vector<float> fullScales(dataRank, 1.f);
const size_t axesRank = axes.size();
for (size_t i = 0; i < axesRank; i++) {
@ -3973,7 +3979,8 @@ bool Interpolate::canFuse(const NodePtr& node) const {
if (!mayiuse(cpu::x64::sse41) ||
interpAttrs.mode == InterpolateMode::linear ||
interpAttrs.mode == InterpolateMode::bilinear_pillow ||
interpAttrs.mode == InterpolateMode::bicubic_pillow) {
interpAttrs.mode == InterpolateMode::bicubic_pillow ||
(!one_of(dataRank, 4u, 5u) && !mayiuse(cpu::x64::avx2))) {
return false;
}

View File

@ -110,6 +110,7 @@ private:
// 2. axis alignment [1,2] to [2,3].
// 3. config planar layout support and treated it as channel_first layout.
bool NCHWAsNHWC = false;
size_t dataRank = 0;
class InterpolateExecutorBase {
public:

View File

@ -268,26 +268,6 @@ TEST_P(InterpolateLayerCPUTest, CompareWithRefs) {
}
namespace {
/* CPU PARAMS */
std::vector<CPUSpecificParams> filterCPUInfoForDevice() {
std::vector<CPUSpecificParams> resCPUParams;
if (InferenceEngine::with_cpu_x86_avx512f()) {
resCPUParams.push_back(CPUSpecificParams{{nChw16c, x, x, x}, {nChw16c}, {"jit_avx512"}, "jit_avx512"});
resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx512"}, "jit_avx512"});
} else if (InferenceEngine::with_cpu_x86_avx2()) {
resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_avx2"}, "jit_avx2"});
resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx2"}, "jit_avx2"});
resCPUParams.push_back(CPUSpecificParams{{nchw, x, x, x}, {nchw}, {"jit_avx2"}, "jit_avx2"});
} else if (InferenceEngine::with_cpu_x86_sse42()) {
resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_sse42"}, "jit_sse42"});
resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_sse42"}, "jit_sse42"});
} else {
resCPUParams.push_back(CPUSpecificParams{{nchw, x, x, x}, {nchw}, {"ref"}, "ref"});
}
return resCPUParams;
}
/* ========== */
const std::vector<ov::op::v11::Interpolate::CoordinateTransformMode> coordinateTransformModes_Smoke = {
ov::op::v11::Interpolate::CoordinateTransformMode::HALF_PIXEL,
ov::op::v11::Interpolate::CoordinateTransformMode::ASYMMETRIC,
@ -344,11 +324,237 @@ std::vector<std::map<std::string, std::string>> filterAdditionalConfig() {
} else {
return {
// default config as an stub for target without avx512, otherwise all tests with BF16 in its name are skipped
{{InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::NO}}
{}
};
}
}
// 3D
std::vector<CPUSpecificParams> filterCPUInfoForDevice3D() {
std::vector<CPUSpecificParams> resCPUParams;
if (InferenceEngine::with_cpu_x86_avx2()) {
resCPUParams.push_back(CPUSpecificParams{{ncw, x, x, x}, {ncw}, {"jit_avx2"}, "jit_avx2"});
} else {
resCPUParams.push_back(CPUSpecificParams{{ncw, x, x, x}, {ncw}, {"ref"}, "ref"});
}
return resCPUParams;
}
std::vector<std::map<std::string, std::string>> filterAdditionalConfig3D() {
return {
{}
};
}
const std::vector<std::vector<size_t>> pads3D_smoke = {
{0, 0, 0},
};
const std::vector<std::vector<size_t>> pads3D_full = {
{0, 0, 1},
};
const std::vector<std::vector<int64_t>> defaultAxes3D = {
{0, 1, 2}
};
const std::vector<ShapeParams> shapeParams3D = {
ShapeParams{
ov::op::v11::Interpolate::ShapeCalcMode::SCALES,
InputShape{{-1, {2, 20}, -1}, {{1, 3, 4}, {2, 4, 6}, {1, 3, 4}}},
ngraph::helpers::InputLayerType::PARAMETER,
{{1.f, 1.f, 1.25f}, {1.f, 1.f, 1.25f}, {1.f, 1.f, 1.5f}},
defaultAxes3D.front()
},
ShapeParams{
ov::op::v11::Interpolate::ShapeCalcMode::SIZES,
InputShape{{-1, {2, 20}, -1}, {{1, 3, 4}, {2, 4, 6}, {1, 3, 4}}},
ngraph::helpers::InputLayerType::PARAMETER,
{{1, 3, 6}, {2, 4, 8}, {1, 3, 6}},
defaultAxes3D.front()
}
};
const auto interpolateCasesNN_Smoke_3D = ::testing::Combine(
::testing::Values(ov::op::v11::Interpolate::InterpolateMode::NEAREST),
::testing::ValuesIn(coordinateTransformModes_Smoke),
::testing::ValuesIn(nearestModes_Smoke),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads3D_smoke),
::testing::ValuesIn(pads3D_smoke),
::testing::ValuesIn(cubeCoefs));
INSTANTIATE_TEST_SUITE_P(smoke_InterpolateNN_Layout_Test_3D, InterpolateLayerCPUTest,
::testing::Combine(
interpolateCasesNN_Smoke_3D,
::testing::ValuesIn(shapeParams3D),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterCPUInfoForDevice3D()),
::testing::ValuesIn(interpolateFusingParamsSet),
::testing::ValuesIn(filterAdditionalConfig3D())),
InterpolateLayerCPUTest::getTestCaseName);
const auto interpolateCasesNN_Full_3D = ::testing::Combine(
::testing::Values(ov::op::v11::Interpolate::InterpolateMode::NEAREST),
::testing::ValuesIn(coordinateTransformModes_Full),
::testing::ValuesIn(nearestModes_Full),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads3D_full),
::testing::ValuesIn(pads3D_full),
::testing::ValuesIn(cubeCoefs));
INSTANTIATE_TEST_SUITE_P(InterpolateNN_Layout_Test_3D, InterpolateLayerCPUTest,
::testing::Combine(
interpolateCasesNN_Full_3D,
::testing::ValuesIn(shapeParams3D),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterCPUInfoForDevice3D()),
::testing::ValuesIn(interpolateFusingParamsSet),
::testing::ValuesIn(filterAdditionalConfig3D())),
InterpolateLayerCPUTest::getTestCaseName);
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
const std::vector<fusingSpecificParams> interpolateFusingParamsSet3D_fixed_C() {
std::vector<fusingSpecificParams> fuseParams;
if (InferenceEngine::with_cpu_x86_avx2()) {
fuseParams.push_back(fusingFakeQuantizePerChannelRelu);
fuseParams.push_back(fusingMultiplyPerChannel);
}
fuseParams.push_back(emptyFusingSpec);
return fuseParams;
}
const std::vector<ShapeParams> shapeParams3D_fixed_C = {
ShapeParams{
ov::op::v11::Interpolate::ShapeCalcMode::SCALES,
InputShape{{}, {{1, 3, 4}}},
ngraph::helpers::InputLayerType::CONSTANT,
{{1.f, 1.f, 1.25f}},
defaultAxes3D.front()
},
ShapeParams{
ov::op::v11::Interpolate::ShapeCalcMode::SIZES,
InputShape{{-1, 3, -1}, {{1, 3, 4}, {1, 3, 6}}},
ngraph::helpers::InputLayerType::CONSTANT,
{{1, 3, 8}},
defaultAxes3D.front()
}
};
INSTANTIATE_TEST_SUITE_P(smoke_InterpolateNN_Layout_PerChannelFuse3D_Test, InterpolateLayerCPUTest,
::testing::Combine(
interpolateCasesNN_Smoke_3D,
::testing::ValuesIn(shapeParams3D_fixed_C),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterCPUInfoForDevice3D()),
::testing::ValuesIn(interpolateFusingParamsSet3D_fixed_C()),
::testing::ValuesIn(filterAdditionalConfig3D())),
InterpolateLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(InterpolateNN_Layout_PerChannelFuse3D_Test, InterpolateLayerCPUTest,
::testing::Combine(
interpolateCasesNN_Full_3D,
::testing::ValuesIn(shapeParams3D_fixed_C),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterCPUInfoForDevice3D()),
::testing::ValuesIn(interpolateFusingParamsSet3D_fixed_C()),
::testing::ValuesIn(filterAdditionalConfig3D())),
InterpolateLayerCPUTest::getTestCaseName);
#endif
const auto interpolateCasesLinear3D_Smoke = ::testing::Combine(
::testing::Values(ov::op::v11::Interpolate::InterpolateMode::LINEAR),
::testing::ValuesIn(coordinateTransformModes_Smoke),
::testing::ValuesIn(defNearestModes),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads3D_smoke),
::testing::ValuesIn(pads3D_smoke),
::testing::ValuesIn(cubeCoefs));
const auto interpolateCasesLinear3D_Full = ::testing::Combine(
::testing::Values(ov::op::v11::Interpolate::InterpolateMode::LINEAR),
::testing::ValuesIn(coordinateTransformModes_Full),
::testing::ValuesIn(defNearestModes),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads3D_full),
::testing::ValuesIn(pads3D_full),
::testing::ValuesIn(cubeCoefs));
INSTANTIATE_TEST_SUITE_P(smoke_InterpolateLinear_Layout3D_Test, InterpolateLayerCPUTest,
::testing::Combine(
interpolateCasesLinear3D_Smoke,
::testing::ValuesIn(shapeParams3D),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterCPUInfoForDevice3D()),
::testing::ValuesIn(interpolateFusingParamsSet),
::testing::ValuesIn(filterAdditionalConfig3D())),
InterpolateLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(InterpolateLinear_Layout3D_Test, InterpolateLayerCPUTest,
::testing::Combine(
interpolateCasesLinear3D_Full,
::testing::ValuesIn(shapeParams3D),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterCPUInfoForDevice3D()),
::testing::ValuesIn(interpolateFusingParamsSet),
::testing::ValuesIn(filterAdditionalConfig3D())),
InterpolateLayerCPUTest::getTestCaseName);
const auto interpolateCasesCubic3D_Smoke = ::testing::Combine(
::testing::Values(ov::op::v11::Interpolate::InterpolateMode::CUBIC),
::testing::ValuesIn(coordinateTransformModes_Smoke),
::testing::ValuesIn(defNearestModes),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads3D_smoke),
::testing::ValuesIn(pads3D_smoke),
::testing::ValuesIn(cubeCoefs));
const auto interpolateCasesCubic3D_Full = ::testing::Combine(
::testing::Values(ov::op::v11::Interpolate::InterpolateMode::CUBIC),
::testing::ValuesIn(coordinateTransformModes_Full),
::testing::ValuesIn(defNearestModes),
::testing::ValuesIn(antialias),
::testing::ValuesIn(pads3D_full),
::testing::ValuesIn(pads3D_full),
::testing::ValuesIn(cubeCoefs));
INSTANTIATE_TEST_SUITE_P(smoke_InterpolateCubic_Layout3D_Test, InterpolateLayerCPUTest,
::testing::Combine(
interpolateCasesCubic3D_Smoke,
::testing::ValuesIn(shapeParams3D),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterCPUInfoForDevice3D()),
::testing::ValuesIn(interpolateFusingParamsSet),
::testing::ValuesIn(filterAdditionalConfig3D())),
InterpolateLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(InterpolateCubic_Layout3D_Test, InterpolateLayerCPUTest,
::testing::Combine(
interpolateCasesCubic3D_Full,
::testing::ValuesIn(shapeParams3D),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterCPUInfoForDevice3D()),
::testing::ValuesIn(interpolateFusingParamsSet),
::testing::ValuesIn(filterAdditionalConfig3D())),
InterpolateLayerCPUTest::getTestCaseName);
// 4D
std::vector<CPUSpecificParams> filterCPUInfoForDevice() {
std::vector<CPUSpecificParams> resCPUParams;
if (InferenceEngine::with_cpu_x86_avx512f()) {
resCPUParams.push_back(CPUSpecificParams{{nChw16c, x, x, x}, {nChw16c}, {"jit_avx512"}, "jit_avx512"});
resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx512"}, "jit_avx512"});
} else if (InferenceEngine::with_cpu_x86_avx2()) {
resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_avx2"}, "jit_avx2"});
resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx2"}, "jit_avx2"});
resCPUParams.push_back(CPUSpecificParams{{nchw, x, x, x}, {nchw}, {"jit_avx2"}, "jit_avx2"});
} else if (InferenceEngine::with_cpu_x86_sse42()) {
resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_sse42"}, "jit_sse42"});
resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_sse42"}, "jit_sse42"});
} else {
resCPUParams.push_back(CPUSpecificParams{{nchw, x, x, x}, {nchw}, {"ref"}, "ref"});
}
return resCPUParams;
}
const std::vector<std::vector<size_t>> pads4D = {
{0, 0, 0, 0},
{0, 0, 1, 1},
@ -444,6 +650,12 @@ INSTANTIATE_TEST_SUITE_P(InterpolateNN_Layout_Test, InterpolateLayerCPUTest,
::testing::ValuesIn(filterAdditionalConfig())),
InterpolateLayerCPUTest::getTestCaseName);
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
const std::vector<fusingSpecificParams> interpolateFusingParamsSet_fixed_C{
fusingFakeQuantizePerChannelRelu,
fusingMultiplyPerChannel,
};
const std::vector<ShapeParams> shapeParams4D_fixed_C = {
ShapeParams{
ov::op::v11::Interpolate::ShapeCalcMode::SCALES,
@ -461,12 +673,6 @@ const std::vector<ShapeParams> shapeParams4D_fixed_C = {
}
};
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
const std::vector<fusingSpecificParams> interpolateFusingParamsSet_fixed_C{
fusingFakeQuantizePerChannelRelu,
fusingMultiplyPerChannel,
};
INSTANTIATE_TEST_SUITE_P(smoke_InterpolateNN_Layout_PerChannelFuse_Test, InterpolateLayerCPUTest,
::testing::Combine(
interpolateCasesNN_Smoke,