diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 8a0223a9687..0fda2ab04fb 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -1303,6 +1303,19 @@ bool MKLDNNNode::inputShapesDefined() const { return true; } +bool MKLDNNNode::outputShapesDefined() const { + for (size_t i = 0; i < outputShapes.size(); i++) { + if (!getChildEdgesAtPort(i)[0]->getMemory().getDesc().isDefined()) { + return false; + } + } + return true; +} + +bool MKLDNNNode::shapesDefined() const { + return inputShapesDefined() && outputShapesDefined(); +} + bool MKLDNNNode::needPrepareParams() const { return inputShapesModified(); } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 0747a642e40..6406101a878 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -707,6 +707,8 @@ protected: bool isDynamic = false; bool inputShapesDefined() const; + bool outputShapesDefined() const; + bool shapesDefined() const; void updateLastInputDims(); bool inputShapesModified() const; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp index 8949bf025d4..d58adc21d81 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp @@ -25,6 +25,9 @@ #include #include +#include +#include +#include #include "utils/cpu_utils.hpp" using namespace mkldnn; @@ -1605,7 +1608,7 @@ private: // shapeND: n c d h w // blockND: ncdhw cdhw dhw hw w 1 // index : 0 1 2 3 4 5 -inline SizeVector getBlockND(SizeVector& shape) { +inline SizeVector getBlockND(const SizeVector& shape) { int shapeRank = shape.size(); SizeVector blockND(shapeRank + 1, 1); for (int i = shapeRank - 1; i >= 0; i--) { @@ -1644,11 +1647,6 @@ using ngInterpShapeCalcMode = ngraph::opset4::Interpolate::ShapeCalcMode; bool MKLDNNInterpolateNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - if (isDynamicNgraphNode(op)) { - errorMessage = "Doesn't support op with dynamic shapes"; - return false; - } - const auto interp = std::dynamic_pointer_cast(op); if (!interp) { errorMessage = "Only opset4 Interpolate operation is supported"; @@ -1683,7 +1681,7 @@ bool MKLDNNInterpolateNode::isSupportedOperation(const std::shared_ptrget_input_shape(DATA_ID).size(); + const size_t dataRank = interp->get_input_partial_shape(DATA_ID).rank().get_length(); if (dataRank < 1 || dataRank > 5) { errorMessage = "Does not support input tensor of rank : " + std::to_string(dataRank); return false; @@ -1694,8 +1692,9 @@ bool MKLDNNInterpolateNode::isSupportedOperation(const std::shared_ptr(interp->get_input_node_shared_ptr(SCALES_ID)) == nullptr) { - errorMessage = "Only const 'scales' input is supported"; + if (!isDynamicNgraphNode(op) && interpShapeCalcMode == ngInterpShapeCalcMode::scales && + !ngraph::is_type(op->get_input_node_ptr(2))) { + errorMessage = "Only const 'scales' input is supported for static shapes"; return false; } @@ -1717,74 +1716,61 @@ MKLDNNInterpolateNode::MKLDNNInterpolateNode(const std::shared_ptr const auto interp = std::dynamic_pointer_cast(op); - if (interp->get_input_size() != 3 && interp->get_input_size() != 4) + const auto numInputs = inputShapes.size(); + if (numInputs != 3 && numInputs != 4) IE_THROW() << errorPrefix << " has incorrect number of input edges"; - if (interp->get_output_size() != 1) + if (outputShapes.size() != 1) IE_THROW() << errorPrefix << " has incorrect number of output edges"; - isAxesSpecified = interp->get_input_size() != 3; + isAxesSpecified = numInputs != 3; const auto &interpAttr = interp->get_attrs(); - const size_t dataRank = interp->get_input_shape(DATA_ID).size(); + const size_t dataRank = getInputShapeAtPort(DATA_ID).getRank(); const auto &interpMode = interpAttr.mode; if (interpMode == ngInterpMode::nearest) { - mode = InterpolateMode::nearest; + interpAttrs.mode = InterpolateMode::nearest; } else if (interpMode == ngInterpMode::linear) { if (dataRank < 5) { - mode = InterpolateMode::linear_onnx; + interpAttrs.mode = InterpolateMode::linear_onnx; } else { - mode = InterpolateMode::linear; + interpAttrs.mode = InterpolateMode::linear; } } else if (interpMode == ngInterpMode::linear_onnx) { - mode = InterpolateMode::linear_onnx; + interpAttrs.mode = InterpolateMode::linear_onnx; } else if (interpMode == ngInterpMode::cubic) { - mode = InterpolateMode::cubic; - } - - switch (dataRank) { - case 1: - case 3: - spatialDimSize = 1; - break; - case 2: - case 4: - spatialDimSize = 2; - break; - case 5: - spatialDimSize = 3; - break; + interpAttrs.mode = InterpolateMode::cubic; } const auto &interpCoordTransMode = interpAttr.coordinate_transformation_mode; if (interpCoordTransMode == ngInterpCoordTransf::half_pixel) { - coordTransMode = InterpolateCoordTransMode::half_pixel; + interpAttrs.coordTransMode = InterpolateCoordTransMode::half_pixel; } else if (interpCoordTransMode == ngInterpCoordTransf::pytorch_half_pixel) { - coordTransMode = InterpolateCoordTransMode::pytorch_half_pixel; + interpAttrs.coordTransMode = InterpolateCoordTransMode::pytorch_half_pixel; } else if (interpCoordTransMode == ngInterpCoordTransf::asymmetric) { - coordTransMode = InterpolateCoordTransMode::asymmetric; + interpAttrs.coordTransMode = InterpolateCoordTransMode::asymmetric; } else if (interpCoordTransMode == ngInterpCoordTransf::tf_half_pixel_for_nn) { - coordTransMode = InterpolateCoordTransMode::tf_half_pixel_for_nn; + interpAttrs.coordTransMode = InterpolateCoordTransMode::tf_half_pixel_for_nn; } else if (interpCoordTransMode == ngInterpCoordTransf::align_corners) { - coordTransMode = InterpolateCoordTransMode::align_corners; + interpAttrs.coordTransMode = InterpolateCoordTransMode::align_corners; } - if (mode == InterpolateMode::nearest) { + if (interpAttrs.mode == InterpolateMode::nearest) { const auto &interpNearestMode = interpAttr.nearest_mode; if (interpNearestMode == ngInterpNearMode::round_prefer_floor) { - nearestMode = InterpolateNearestMode::round_prefer_floor; + interpAttrs.nearestMode = InterpolateNearestMode::round_prefer_floor; } else if (interpNearestMode == ngInterpNearMode::round_prefer_ceil) { - nearestMode = InterpolateNearestMode::round_prefer_ceil; + interpAttrs.nearestMode = InterpolateNearestMode::round_prefer_ceil; } else if (interpNearestMode == ngInterpNearMode::floor) { - nearestMode = InterpolateNearestMode::floor; + interpAttrs.nearestMode = InterpolateNearestMode::floor; } else if (interpNearestMode == ngInterpNearMode::ceil) { - nearestMode = InterpolateNearestMode::ceil; + interpAttrs.nearestMode = InterpolateNearestMode::ceil; } else if (interpNearestMode == ngInterpNearMode::simple) { - nearestMode = InterpolateNearestMode::simple; + interpAttrs.nearestMode = InterpolateNearestMode::simple; } - } else if (mode == InterpolateMode::cubic) { - cubeCoeff = static_cast(interpAttr.cube_coeff); + } else if (interpAttrs.mode == InterpolateMode::cubic) { + interpAttrs.cubeCoeff = static_cast(interpAttr.cube_coeff); } - antialias = interpAttr.antialias; + interpAttrs.antialias = interpAttr.antialias; const auto &interpShapeCalcMode = interpAttr.shape_calculation_mode; if (interpShapeCalcMode == ngInterpShapeCalcMode::scales) { @@ -1794,23 +1780,21 @@ MKLDNNInterpolateNode::MKLDNNInterpolateNode(const std::shared_ptr } if (interpAttr.pads_begin.empty()) { - padBegin.resize(dataRank, 0); + interpAttrs.padBegin.resize(dataRank, 0); } else { - padBegin.resize(interpAttr.pads_begin.size()); + interpAttrs.padBegin.resize(interpAttr.pads_begin.size()); for (size_t i = 0; i < interpAttr.pads_begin.size(); i++) - padBegin[i] = static_cast(interpAttr.pads_begin[i]); + interpAttrs.padBegin[i] = static_cast(interpAttr.pads_begin[i]); } if (interpAttr.pads_end.empty()) { - padEnd.resize(dataRank, 0); + interpAttrs.padEnd.resize(dataRank, 0); } else { - padEnd.resize(interpAttr.pads_end.size()); + interpAttrs.padEnd.resize(interpAttr.pads_end.size()); for (size_t i = 0; i < interpAttr.pads_end.size(); i++) - padEnd[i] = static_cast(interpAttr.pads_end[i]); + interpAttrs.padEnd[i] = static_cast(interpAttr.pads_end[i]); } - scales = std::dynamic_pointer_cast(interp->get_input_node_shared_ptr(SCALES_ID))->cast_vector(); - if (isAxesSpecified) { axes = std::dynamic_pointer_cast(interp->get_input_node_shared_ptr(AXES_ID))->cast_vector(); } else { @@ -1819,10 +1803,6 @@ MKLDNNInterpolateNode::MKLDNNInterpolateNode(const std::shared_ptr axes[i] = i; } } - - if (scales.size() != axes.size()) { - IE_THROW() << errorPrefix << " does not have the same number elements in scales as in axis."; - } } else { IE_THROW(NotImplemented) << errorMessage; } @@ -1835,18 +1815,17 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges"; - srcDim = getInputShapeAtPort(DATA_ID).getStaticDims(); - int dataRank = srcDim.size(); + int dataRank = getInputShapeAtPort(DATA_ID).getRank(); // get pad - for (int i = 0; i < padBegin.size(); i++) { - if (padBegin[i] != 0) { + for (int i = 0; i < interpAttrs.padBegin.size(); i++) { + if (interpAttrs.padBegin[i] != 0) { hasPad = true; break; } } - for (int i = 0; i < padEnd.size(); i++) { - if (padEnd[i] != 0) { + for (int i = 0; i < interpAttrs.padEnd.size(); i++) { + if (interpAttrs.padEnd[i] != 0) { hasPad = true; break; } @@ -1868,21 +1847,15 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { return result; }; - padBegin = correctPad(padBegin, dataRank); - padEnd = correctPad(padEnd, dataRank); - srcDimPad = getPaddedInputShape(); - } else { - srcDimPad = srcDim; + interpAttrs.padBegin = correctPad(interpAttrs.padBegin, dataRank); + interpAttrs.padEnd = correctPad(interpAttrs.padEnd, dataRank); } - dstDim = getOutputShapeAtPort(0).getStaticDims(); } void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - setPostOps(attr, true); - Precision inputPrecision = getOriginalInputPrecisionAtPort(DATA_ID); if ((inputPrecision != Precision::I8) && (inputPrecision != Precision::U8) && (inputPrecision != Precision::BF16)) { inputPrecision = Precision::FP32; @@ -1900,12 +1873,6 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { inputPrecision = outputPrecision = Precision::FP32; } - srcDataSize = inputPrecision.size(); - dstDataSize = outputPrecision.size(); - - inputPrec = inputPrecision; - outputPrec = outputPrecision; - NodeConfig config; config.dynBatchSupport = false; if (isAxesSpecified) { @@ -1932,36 +1899,117 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.push_back({config, implDetail}); }; - auto channels = getInputShapeAtPort(DATA_ID).getRank() > 1 ? getInputShapeAtPort(DATA_ID).getStaticDims()[1] : 1; + const auto &dataMinDims = getInputShapeAtPort(DATA_ID).getMinDims(); + bool isBlkApplied = getInputShapeAtPort(DATA_ID).getRank() > 1 && dataMinDims[1] != Shape::UNDEFINED_DIM && dataMinDims[1] > 1; - if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { + if (!mayiuse(cpu::x64::sse41) || interpAttrs.mode == InterpolateMode::linear) { pushDesc(LayoutType::ncsp, ref); } else { // blk and by_channel JIT kernel on sse41 or above machine - if (getInputShapeAtPort(DATA_ID).getRank() == 4 || (getInputShapeAtPort(DATA_ID).getRank() == 5 && mode != InterpolateMode::cubic)) { + if (getInputShapeAtPort(DATA_ID).getRank() == 4 || (getInputShapeAtPort(DATA_ID).getRank() == 5 && interpAttrs.mode != InterpolateMode::cubic)) { if (mayiuse(cpu::x64::avx512_common)) { pushDesc(LayoutType::nspc, jit_avx512); - if (channels != 1) + if (isBlkApplied) pushDesc(LayoutType::nCsp16c, jit_avx512); } else if (mayiuse(cpu::x64::avx2)) { pushDesc(LayoutType::nspc, jit_avx2); - if (channels != 1) + if (isBlkApplied) pushDesc(LayoutType::nCsp8c, jit_avx2); } else { pushDesc(LayoutType::nspc, jit_sse42); - if (channels != 1) + if (isBlkApplied) pushDesc(LayoutType::nCsp8c, jit_sse42); } } // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) - if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) { + if (mayiuse(cpu::x64::avx2) && inputPrecision == Precision::FP32) { pushDesc(LayoutType::ncsp, jit_avx2); } } } -void MKLDNNInterpolateNode::createPrimitive() { +bool MKLDNNInterpolateNode::needShapeInfer() const { + if (MKLDNNNode::inputShapesModified()) { + return true; + } + if (shapeCalcMode == InterpolateShapeCalcMode::scales) { + if (lastScales.empty()) { + return true; + } + const float *scales = reinterpret_cast(getParentEdgesAtPort(SCALES_ID)[0]->getMemory().GetPtr()); + for (size_t i = 0; i < lastScales.size(); i++) { + if (lastScales[i] != scales[i]) { + return true; + } + } + } else { + if (lastSizes.empty()) { + return true; + } + const int32_t *sizes = reinterpret_cast(getParentEdgesAtPort(TARGET_SHAPE_ID)[0]->getMemory().GetPtr()); + for (size_t i = 0; i < lastSizes.size(); i++) { + if (sizes[i] != lastSizes[i]) { + return true; + } + } + } + return false; +} + +std::vector MKLDNNInterpolateNode::shapeInfer() const { + std::vector input_shapes = { + getParentEdgesAtPort(DATA_ID)[0]->getMemory().GetShape().getStaticDims(), + getParentEdgesAtPort(TARGET_SHAPE_ID)[0]->getMemory().GetShape().getStaticDims(), + getParentEdgesAtPort(SCALES_ID)[0]->getMemory().GetShape().getStaticDims() + }; + + const size_t port = shapeCalcMode == InterpolateShapeCalcMode::sizes ? TARGET_SHAPE_ID : SCALES_ID; + const auto &memory = getParentEdgesAtPort(port)[0]->getMemory(); + std::map> input_values = { + {port, std::make_shared(InferenceEngine::details::convertPrecision(memory.getDesc().getPrecision()), + memory.getStaticDims(), memory.GetPtr())} + }; + + if (getParentEdges().size() > AXES_ID) { + const auto &memory = getParentEdgesAtPort(AXES_ID)[0]->getMemory(); + input_shapes.push_back(memory.getStaticDims()); + input_values.insert({3, std::make_shared(InferenceEngine::details::convertPrecision(memory.getDesc().getPrecision()), + memory.getStaticDims(), memory.GetPtr())}); + } + + std::vector output_shapes; + shape_inference(opToShapeInfer.get(), input_shapes, output_shapes, input_values); + + std::vector result(output_shapes.size()); + std::transform(output_shapes.begin(), output_shapes.end(), result.begin(), [](const ov::StaticShape& s){ return s.to_shape(); }); + + return result; +} + +void MKLDNNInterpolateNode::executeDynamicImpl(mkldnn::stream strm) { + execute(strm); + + const size_t port = shapeCalcMode == InterpolateShapeCalcMode::sizes ? TARGET_SHAPE_ID : SCALES_ID; + const auto &memory = getParentEdgesAtPort(port)[0]->getMemory(); + if (shapeCalcMode == InterpolateShapeCalcMode::scales) { + const float *scales = reinterpret_cast(memory.GetPtr()); + lastScales.assign(scales, scales + memory.getDesc().getShape().getElementsCount()); + } else { + const int32_t *sizes = reinterpret_cast(memory.GetPtr()); + lastSizes.assign(sizes, sizes + memory.getDesc().getShape().getElementsCount()); + } +} + +bool MKLDNNInterpolateNode::needPrepareParams() const { + return (inputShapesModified() || lastOutputDims != getChildEdgesAtPort(0)[0]->getMemory().getStaticDims()); +} + +void MKLDNNInterpolateNode::prepareParams() { + if (!shapesDefined()) { + IE_THROW() << "Can't prepare params for Interpolate node with name: " << getName() << ", because input/output dims aren't defined"; + } + auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); auto& tsMemPtr = getParentEdgeAt(TARGET_SHAPE_ID)->getMemoryPtr(); @@ -1982,81 +2030,55 @@ void MKLDNNInterpolateNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; - auto jcp = jit_interpolate_config_params(); - jcp.mode = mode; - jcp.src_dt = getParentEdgeAt(0)->getMemory().GetDataType(); - jcp.dst_dt = getChildEdgeAt(0)->getMemory().GetDataType(); - jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); - jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); - jcp.indices_size = sizeof(int); - size_t dimSize = dstDim.size(); - auto srcDimPad5d = to5Dim(srcDimPad); - auto dstDim5d = to5Dim(dstDim); - jcp.OW = dstDim5d[4]; - jcp.OH = dstDim5d[3]; - jcp.OD = dstDim5d[2]; - jcp.IW = srcDimPad5d[4]; - jcp.IH = srcDimPad5d[3]; - jcp.ID = srcDimPad5d[2]; - jcp.spatial_dim_size = spatialDimSize; + const auto &srcDims = srcMemPtr->getStaticDims(); + const auto &dstDims = dstMemPtr->getStaticDims(); + setPostOps(attr, dstDims, true); - if (getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) { - jcp.layout = InterpolateLayoutType::planar; - } else if (getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || - getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c)) { - jcp.layout = InterpolateLayoutType::block; - } else { - jcp.layout = InterpolateLayoutType::by_channel; - } - - configured_for_layout = jcp.layout; - - if (mode == InterpolateMode::nearest || mode == InterpolateMode::linear_onnx || mode == InterpolateMode::cubic) { - if (jcp.layout != InterpolateLayoutType::planar) { - if (mayiuse(cpu::x64::avx512_common)) { - interpolateKernel.reset(new jit_uni_interpolate_kernel_f32(jcp, *attr.get())); - } else if (mayiuse(cpu::x64::avx2)) { - interpolateKernel.reset(new jit_uni_interpolate_kernel_f32(jcp, *attr.get())); - } else if (mayiuse(cpu::x64::sse41)) { - interpolateKernel.reset(new jit_uni_interpolate_kernel_f32(jcp, *attr.get())); - } - } else { - // gather ISA(for planar JIT kernel) for avx2 and fp32 - if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) { - interpolateKernel.reset(new jit_uni_interpolate_kernel_f32(jcp, *attr.get())); - } - } - if (interpolateKernel) - interpolateKernel->create_ker(); - } - - // build indices table - std::vector dataScales = getScales(); - if (dimSize > 2 && (dataScales[0] != 1.f || dataScales[1] != 1.f)) { + std::vector dataScales = getScales(getPaddedInputShape(srcDims, interpAttrs.padBegin, interpAttrs.padEnd), dstDims); + if (getOutputShapeAtPort(0).getRank() > 2 && (dataScales[0] != 1.f || dataScales[1] != 1.f)) { IE_THROW() << "Interpolate layer only supports resize on spatial dimensions(depth, height and width)"; } + if ((interpAttrs.mode == InterpolateMode::nearest || interpAttrs.mode == InterpolateMode::linear_onnx || interpAttrs.mode == InterpolateMode::cubic) && + ((interpAttrs.layout != InterpolateLayoutType::planar && mayiuse(cpu::x64::sse41)) || + (mayiuse(cpu::x64::avx2) && interpAttrs.inPrc == Precision::FP32))) { + execPtr = std::make_shared(interpAttrs, + srcDims, + dstDims, + dataScales, + attr); + } else { + execPtr = std::make_shared(interpAttrs, + srcDims, + dstDims, + dataScales); + } + lastOutputDims = dstDims; +} - switch (mode) { - case InterpolateMode::nearest: { - buildTblNN(srcDimPad5d, dstDim5d, dataScales, jcp.layout); - break; - } - case InterpolateMode::linear_onnx: { - buildTblLinearOnnx(srcDimPad5d, dstDim5d, dataScales, jcp.layout); - break; - } - case InterpolateMode::linear: { - buildTblLinear(srcDimPad5d, dstDim5d, dataScales, LINEAR_KERNEL, antialias); - break; - } - case InterpolateMode::cubic: { - buildTblCubic(srcDimPad5d, dstDim5d, dataScales, cubeCoeff, jcp.layout); - break; - } - default: { - IE_THROW() << errorPrefix << " does not support interpolate mode:" << mode; - break; - } +void MKLDNNInterpolateNode::createPrimitive() { + auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto& dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); + if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) + IE_THROW() << errorPrefix << " did not allocate input memory"; + if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr()) + IE_THROW() << errorPrefix << " did not allocate destination memory"; + + if (dstMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { + interpAttrs.layout = InterpolateLayoutType::planar; + } else if (dstMemPtr->getDesc().hasLayoutType(LayoutType::nCsp8c) || + dstMemPtr->getDesc().hasLayoutType(LayoutType::nCsp16c)) { + interpAttrs.layout = InterpolateLayoutType::block; + } else { + interpAttrs.layout = InterpolateLayoutType::by_channel; + } + + interpAttrs.inPrc = srcMemPtr->getDesc().getPrecision(); + interpAttrs.outPrc = dstMemPtr->getDesc().getPrecision(); + + if (shapesDefined()) { + if (needPrepareParams()) + prepareParams(); + updateLastInputDims(); } } @@ -2064,11 +2086,412 @@ inline int clipCoord(int pos, int length) { return std::max(static_cast(0), std::min(pos, length - 1)); } +static inline float triangleCoeff(float x) { + return (std::max)(0.0f, 1 - std::abs(x)); +} + +void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights) { + mkldnn::post_ops ops; + + for (auto &node : fusedWith) { + auto* fakeQuantizeNode = dynamic_cast(node.get()); + if (fakeQuantizeNode) { + fakeQuantizeNode->appendPostOps(ops); + continue; + } + + auto* eltwiseNode = dynamic_cast(node.get()); + if (eltwiseNode) { + constexpr int align = 16; + eltwiseNode->appendPostOps(ops, dims, align); + continue; + } + + IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented"; + } + + attr.set_post_ops(ops); +} + +SizeVector MKLDNNInterpolateNode::getPaddedInputShape(const VectorDims &srcDims, + const std::vector &padBegin, + const std::vector &padEnd) { + SizeVector paddedShape; + int dataRank = srcDims.size(); + for (int i = 0; i < dataRank; i++) { + paddedShape.push_back(srcDims[i] + padBegin[i] + padEnd[i]); + } + return paddedShape; +} + +// get scales of data rank size +// if "scale" version: set scales with input scales, 1.f for other dims not in axis +// if "size" version: scales = shape[target] / shape[input].pad, 1.f for other dims not in axis +// scales is a required input, but should not use input scales when "size" case, which may added eps that lead to inaccurate result, recalculate scales instead. +std::vector MKLDNNInterpolateNode::getScales(const VectorDims &srcDimPad, const VectorDims &dstDim) { + const size_t dataRank = getInputShapeAtPort(DATA_ID).getRank(); + const float *scales = reinterpret_cast(getParentEdgesAtPort(SCALES_ID)[0]->getMemory().GetPtr()); + std::vector fullScales(dataRank, 1.f); + const size_t axesRank = axes.size(); + for (size_t i = 0; i < axesRank; i++) { + int axis = axes[i]; + fullScales[axis] = (shapeCalcMode == InterpolateShapeCalcMode::scales) ? scales[i] : + static_cast(dstDim[axis]) / static_cast(srcDimPad[axis]); + } + return fullScales; +} + +void MKLDNNInterpolateNode::execute(mkldnn::stream strm) { + if (!execPtr) { + IE_THROW() << "Can't execute Interpolate node. Primitive didn't created"; + } + + auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto &srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + + uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t *src_data_origin = reinterpret_cast(srcMemPtr->GetData()); + + const auto &srcDim = srcMemPtr->getStaticDims(); + const auto &dstDim = dstMemPtr->getStaticDims(); + size_t dimSize = srcDim.size(); + auto srcDimPad = execPtr->getSrcDimPad5d(); + + const auto srcDim5d = to5Dim(srcDim); + const auto srcDimPad5d = to5Dim(srcDimPad); + const auto dstDim5d = to5Dim(dstDim); + const auto srcDataSize = srcMemPtr->getDesc().getPrecision().size(); + + const uint8_t *src_data = nullptr; + std::vector srcPadded; + if (hasPad) { + int padB0 = (dimSize > 2) ? interpAttrs.padBegin[0] : 0; + int padB1 = (dimSize > 2) ? interpAttrs.padBegin[1] : 0; + int padB2 = (dimSize == 5) ? interpAttrs.padBegin[dimSize - 3] : 0; + int padB3 = interpAttrs.padBegin[dimSize - 2]; + int padB4 = interpAttrs.padBegin[dimSize - 1]; + + SizeVector inShapeBlock = getBlockND(srcDim5d); + SizeVector inShapePadBlock = getBlockND(srcDimPad5d); + + if (interpAttrs.layout == InterpolateLayoutType::planar) { + srcPadded.resize(inShapePadBlock[0] * srcDataSize, 0); + uint8_t *src_data_pad = static_cast(&srcPadded[0]); + parallel_for4d(srcDim5d[0], srcDim5d[1], srcDim5d[2], srcDim5d[3], [&](int n, int c, int d, int h) { + const uint8_t *src = src_data_origin + (inShapeBlock[1] * n + inShapeBlock[2] * c + inShapeBlock[3] * d + inShapeBlock[4] * h) * srcDataSize; + uint8_t *srcPad = src_data_pad + (inShapePadBlock[1] * (n + padB0) + inShapePadBlock[2] * (c + padB1) + + inShapePadBlock[3] * (d + padB2) + inShapePadBlock[4] * (h + padB3) + padB4) * srcDataSize; + cpu_memcpy(srcPad, src, srcDim5d[4] * srcDataSize); + }); + src_data = src_data_pad; + } else if (interpAttrs.layout == InterpolateLayoutType::by_channel) { + srcPadded.resize(inShapePadBlock[0] * srcDataSize, 0); + uint8_t *src_data_pad = static_cast(&srcPadded[0]); + parallel_for4d(srcDim5d[0], srcDim5d[2], srcDim5d[3], srcDim5d[4], [&](int n, int d, int h, int w) { + const uint8_t *src = src_data_origin + (inShapeBlock[1] * n + + (inShapeBlock[3] * d + inShapeBlock[4] * h + inShapeBlock[5] * w) * srcDim5d[1]) * srcDataSize; + uint8_t *srcPad = src_data_pad + (inShapePadBlock[1] * (n + padB0) + (inShapePadBlock[3] * (d + padB2) + + inShapePadBlock[4] * (h + padB3) + inShapePadBlock[5] * (w + padB4)) * srcDimPad5d[1] + padB1) * srcDataSize; + cpu_memcpy(srcPad, src, srcDim5d[1] * srcDataSize); + }); + src_data = src_data_pad; + } else if (interpAttrs.layout == InterpolateLayoutType::block) { + size_t blkSize = mayiuse(cpu::x64::avx512_common) ? 16 : 8; + size_t CB = div_up(srcDimPad5d[1], blkSize); + size_t eltsTotal = srcDimPad5d[0] * CB * srcDimPad5d[2] * srcDimPad5d[3] * srcDimPad5d[4] * blkSize; + srcPadded.resize(eltsTotal * srcDataSize, 0x0); + uint8_t *src_data_pad = static_cast(&srcPadded[0]); + if ((srcDim5d[0] != srcDimPad5d[0]) || (srcDim5d[1] != srcDimPad5d[1])) { + IE_THROW() << "Interpolate layer with name '" << getName() << + "' does not support padding on batch and channel dimensions"; + } + parallel_for5d(srcDim5d[0], CB, srcDim5d[2], srcDim5d[3], srcDim5d[4], [&](int n, int cb, int d, int h, int w) { + const uint8_t *src = src_data_origin + (n * CB * srcDim5d[2] * srcDim5d[3] * srcDim5d[4] * blkSize) * srcDataSize + + (cb * srcDim5d[2] * srcDim5d[3] * srcDim5d[4] * blkSize) * srcDataSize + + (d * srcDim5d[3] * srcDim5d[4] * blkSize) * srcDataSize + + (h * srcDim5d[4] * blkSize) * srcDataSize + + (w * blkSize) * srcDataSize; + uint8_t *srcPad = src_data_pad + (n * CB * srcDimPad5d[2] * srcDimPad5d[3] * srcDimPad5d[4] * blkSize) * srcDataSize + + (cb * srcDimPad5d[2] * srcDimPad5d[3] * srcDimPad5d[4] * blkSize) * srcDataSize + + ((d + padB2) * srcDimPad5d[3] * srcDimPad5d[4] * blkSize) * srcDataSize + + ((h + padB3) * srcDimPad5d[4] * blkSize) * srcDataSize + + ((w + padB4) * blkSize) * srcDataSize; + cpu_memcpy(srcPad, src, blkSize * srcDataSize); + }); + src_data = src_data_pad; + } + } else { + src_data = src_data_origin; + } + + execPtr->exec(src_data, dst_data); +} + +// for ndhwc and nCdhw8c[16c] +// input may be f32/bf16/int8, fused->output varies +void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, + int ID, int IH, int IW, int OD, int OH, int OW) { + int *index_d = static_cast(&indexTable[0]); + int *index_h = static_cast(&indexTable[OD]); + int *index_w = static_cast(&indexTable[OD + OH]); + + bool is_nhwc = (configured_for_layout == by_channel); + + for (int b = 0; b < B; b++) { + if (is_nhwc) { + const uint8_t *in_ptr = in_ptr_ + (IW * IH * ID * C * b) * srcDataSize; + uint8_t *out_ptr = out_ptr_ + (OW * OH * OD * C * b) * dstDataSize; + std::vector index_w_kernel(OW); + for (int ox = 0; ox < OW; ox++) { + index_w_kernel[ox] = index_w[ox] * C * srcDataSize; + } + parallel_for2d(OD, OH, [&](size_t d, size_t h) { + // kernel for C * OW + uint8_t *out_ptr_dh = out_ptr + (C * OW * OH * d + C * OW * h) * dstDataSize; + const uint8_t *in_ptr_dh = in_ptr + (C * IW * IH * index_d[d] + C * IW * index_h[h]) * srcDataSize; + auto arg = jit_interpolate_call_args(); + arg.dst = out_ptr_dh; + arg.src_ptr[0] = in_ptr_dh; + arg.index = static_cast(&(index_w_kernel[0])); + arg.work_amount = C; + arg.oc_off = 0; + (*interpolateKernel)(&arg); + }); + } else { // for blk + int blk_size = mayiuse(cpu::x64::avx512_common) ? 16 : 8; + int CB = div_up(C, blk_size); + const uint8_t *in_ptr = in_ptr_ + (IW * IH * ID * CB * blk_size * b) * srcDataSize; + uint8_t *out_ptr = out_ptr_ + (OW * OH * OD * CB * blk_size * b) * dstDataSize; + std::vector index_w_kernel(OW); + for (int ox = 0; ox < OW; ox++) { + index_w_kernel[ox] = index_w[ox] * blk_size * srcDataSize; + } + parallel_for2d(CB, OD, [&](size_t cb, size_t d) { + uint8_t *out_ptr_cbd = out_ptr + (blk_size * OW * OH * OD * cb + blk_size * OW * OH * d) * dstDataSize; + const uint8_t *in_ptr_cbd = in_ptr + (blk_size * IW * IH * ID * cb + blk_size * IW * IH * index_d[d]) * srcDataSize; + auto arg = jit_interpolate_call_args(); + for (int h = 0; h < OH; h++) { // kernel for blk_size * OW + arg.dst = out_ptr_cbd + blk_size * OW * h * dstDataSize; + arg.src_ptr[0] = in_ptr_cbd + blk_size * IW * index_h[h] * srcDataSize; + arg.index = static_cast(&(index_w_kernel[0])); + arg.work_amount = static_cast(OW); + arg.oc_off = cb * blk_size * sizeof(float); + (*interpolateKernel)(&arg); + } + }); + } + } // batch end +} + +void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, + int OD, int OH, int OW) { + int *index_d = static_cast(&indexTable[0]); + int *index_h = static_cast(&indexTable[OD]); + int *index_w = static_cast(&indexTable[OD + OH]); + + std::vector index_kernel(OH + OW); + // index_h * IW * srcDataSize to reduce and simplify redundant compute + for (int oh = 0; oh < OH; oh++) { + index_kernel[oh] = index_h[oh] * IW * srcDataSize; + } + // index_w * srcDataSize + for (int ow = 0; ow < OW; ow++) { + index_kernel[OH + ow] = index_w[ow] * srcDataSize; + } + + parallel_for3d(B, C, OD, [&](size_t b, size_t c, size_t od) { + const uint8_t *in_ptr = in_ptr_ + (IW * IH * ID * C * b + IW * IH * ID * c + IW * IH * index_d[od]) * srcDataSize; + uint8_t *out_ptr = out_ptr_ + (OW * OH * OD * C * b + OW * OH * OD * c + OW * OH * od) * dstDataSize; + + auto arg = jit_interpolate_call_args(); + arg.src_ptr[0] = in_ptr; + arg.dst = out_ptr; + arg.index = static_cast(&index_kernel[0]); // need index_h and index_w in kernel, it's in continous memory so one param + arg.oc_off = static_cast(c * sizeof(float)); + // work_amount is OH(out loop) and OW(inner loop), can get in kernel from jcp. + (*interpolateKernel)(&arg); + }); +} + +void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, + int ID, int IH, int IW, int OD, int OH, int OW) { + // FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, EndTopLeft:4, EndTopRight:5, EndBottomLeft:6, EndBottomRight:7 + // weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5 + int *index = static_cast(&indexTable[0]); + int eltInGrid = (spatialDimSize > 2) ? MAX_INPUT_INTERPOLATE : ((spatialDimSize > 1) ? 4 : 2); + int scratchLen = rnd_up(eltInGrid * OW * OH * OD, 16); + float *weight = reinterpret_cast(&indexTable[scratchLen]); + + parallel_for2d(B, C, [&](size_t b, size_t c) { + uint8_t *out_ptr_nc = out_ptr_ + (OH * OW * OD * C * b + OH * OW * OD * c) * dstDataSize; + const uint8_t *in_ptr_nc = in_ptr_ + (IH * IW * ID * C * b + IH * IW * ID * c) * srcDataSize; + auto arg = jit_interpolate_call_args(); + arg.src_ptr[0] = in_ptr_nc; + arg.index = static_cast(&index[0]); + arg.weight_ptr[0] = static_cast(&weight[0]); + arg.dst = out_ptr_nc; + arg.work_amount = OW * OH * OD; + arg.oc_off = static_cast(c * sizeof(float)); + (*interpolateKernel)(&arg); + }); +} + +void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, + int OD, int OH, int OW) { + // left:OW right:OW Top:OH Bottom:OH Front:OD End:OD + std::vector indexPtr(MAX_INPUT_INTERPOLATE, 0); + std::vector weightPtr(MAX_INPUT_INTERPOLATE, 0); + size_t scratchLen = rnd_up(OW + OW + OH + OH + OD + OD, 16); + indexPtr[0] = static_cast(&indexTable[0]); + indexPtr[1] = static_cast(&indexTable[OW]); + indexPtr[2] = static_cast(&indexTable[2 * OW]); + indexPtr[3] = static_cast(&indexTable[2 * OW + OH]); + indexPtr[4] = static_cast(&indexTable[2 * OW + 2 * OH]); + indexPtr[5] = static_cast(&indexTable[2 * OW + 2 * OH + OD]); + + weightPtr[0] = reinterpret_cast(&indexTable[scratchLen]); + weightPtr[1] = reinterpret_cast(&indexTable[scratchLen + OW]); + weightPtr[2] = reinterpret_cast(&indexTable[scratchLen + 2 * OW]); + weightPtr[3] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + OH]); + weightPtr[4] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + 2 * OH]); + weightPtr[5] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + 2 * OH + OD]); + + bool isByChannel = (configured_for_layout == by_channel) ? true : false; + + int blkSize = mayiuse(cpu::x64::avx512_common) ? 16 : 8; + int CB = isByChannel ? 1 : div_up(C, blkSize); + int CGatherLen = isByChannel ? C : blkSize; + int workAmount = isByChannel ? C : CB; + // n_CB(1)_d_h_w_8[16](c), () for by-channel + int C0 = OW * CGatherLen; + int C1 = OH * C0; + int C2 = OD * C1; + int C3 = CB * C2; + int I0 = IW * CGatherLen; + int I1 = IH * I0; + int I2 = ID * I1; + int I3 = CB * I2; + parallel_for3d(B, OD, OH, [&](size_t b, size_t d, size_t h) { + uint8_t *out_ptr_ndh = out_ptr_ + (C3 * b + C1 * d + C0 * h) * dstDataSize; + + const uint8_t *in_ptr_n = in_ptr_ + (I3 * b) * srcDataSize; + const uint8_t *in_ptr_nf = in_ptr_n + (indexPtr[4][d] * I1) * srcDataSize; + const uint8_t *in_ptr_nft = in_ptr_nf + (indexPtr[2][h] * I0) * srcDataSize; + const uint8_t *in_ptr_nfb = in_ptr_nf + (indexPtr[3][h] * I0) * srcDataSize; + const uint8_t *in_ptr_ne = in_ptr_n + (indexPtr[5][d] * I1) * srcDataSize; + const uint8_t *in_ptr_net = in_ptr_ne + (indexPtr[2][h] * I0) * srcDataSize; + const uint8_t *in_ptr_neb = in_ptr_ne + (indexPtr[3][h] * I0) * srcDataSize; + auto arg = jit_interpolate_call_args(); + for (int w = 0; w < OW; ++w) { + uint8_t *out_ptr_ndhw = out_ptr_ndh + CGatherLen * w * dstDataSize; + + arg.src_ptr[0] = in_ptr_nft + (indexPtr[0][w] * CGatherLen) * srcDataSize; + arg.src_ptr[1] = in_ptr_nft + (indexPtr[1][w] * CGatherLen) * srcDataSize; + arg.src_ptr[2] = in_ptr_nfb + (indexPtr[0][w] * CGatherLen) * srcDataSize; + arg.src_ptr[3] = in_ptr_nfb + (indexPtr[1][w] * CGatherLen) * srcDataSize; + arg.src_ptr[4] = in_ptr_net + (indexPtr[0][w] * CGatherLen) * srcDataSize; + arg.src_ptr[5] = in_ptr_net + (indexPtr[1][w] * CGatherLen) * srcDataSize; + arg.src_ptr[6] = in_ptr_neb + (indexPtr[0][w] * CGatherLen) * srcDataSize; + arg.src_ptr[7] = in_ptr_neb + (indexPtr[1][w] * CGatherLen) * srcDataSize; + arg.weight_ptr[0] = static_cast(&weightPtr[0][w]); + arg.weight_ptr[1] = static_cast(&weightPtr[1][w]); + arg.weight_ptr[2] = static_cast(&weightPtr[2][h]); + arg.weight_ptr[3] = static_cast(&weightPtr[3][h]); + arg.weight_ptr[4] = static_cast(&weightPtr[4][d]); + arg.weight_ptr[5] = static_cast(&weightPtr[5][d]); + arg.dst = out_ptr_ndhw; + arg.work_amount = workAmount; + arg.oc_off = 0; + (*interpolateKernel)(&arg); + } + }); +} + +void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { + const int idxNum = 1; + int *xOrigin = static_cast(&indexTable[0]); + float *xFactor = reinterpret_cast(&indexTable[OW]); + int *yOrigin = static_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW]); + float *yFactor = reinterpret_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW + OH]); + + int blkSize = mayiuse(cpu::x64::avx512_common) ? 16 : 8; + int CB = div_up(C, blkSize); + int CSize = configured_for_layout == InterpolateLayoutType::by_channel ? C : blkSize * CB; + int CGatherLen = configured_for_layout == InterpolateLayoutType::by_channel ? C : blkSize; + int workAmount = configured_for_layout == InterpolateLayoutType::by_channel ? C : CB; + + parallel_for3d(B, OH, OW, [&](size_t b, size_t h, size_t w) { + uint8_t *out_ptr_nhw = out_ptr_ + (OH * OW * CSize * b + OW * CGatherLen * h + CGatherLen * w) * dstDataSize; + const uint8_t *in_ptr_n = in_ptr_ + (IH * IW * CSize * b) * srcDataSize; + + std::vector kernelIndex(CUBIC_GRID_LEN * CUBIC_GRID_LEN); // 16 address offset to src(batch) or src(CB) + int iy = yOrigin[h]; + int ix = xOrigin[w]; + for (int y = iy - 1, i = 0; y <= iy + 2; y++, i++) { + int yInRange = std::max(0, std::min(y, IH - 1)); + yInRange = yInRange * CGatherLen * IW * srcDataSize; + for (int x = ix - 1, j = 0; x <= ix + 2; x++, j++) { + int xInRange = std::max(0, std::min(x, IW - 1)); + xInRange = yInRange + xInRange * CGatherLen * srcDataSize; + kernelIndex[i * CUBIC_GRID_LEN + j] = xInRange; + } + } + auto arg = jit_interpolate_call_args(); + arg.dst = out_ptr_nhw; + arg.src_ptr[0] = in_ptr_n; + arg.index = static_cast(&kernelIndex[0]); + // 0 for weight_W, 1 for weight_H + arg.weight_ptr[0] = static_cast(&xFactor[w * CUBIC_GRID_LEN]); + arg.weight_ptr[1] = static_cast(&yFactor[h * CUBIC_GRID_LEN]); + + // for by channel, src + step, dst + step, process next step on continuous memory + // for blk, src + IW*IH*blkSize, dst + OW*OH*blkSize, process the blkSize on next CB + arg.work_amount = workAmount; + arg.oc_off = 0; + (*interpolateKernel)(&arg); + }); +} + +void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { + int tblAdvance = 0; + int *xOrigin = static_cast(&indexTable[tblAdvance]); + tblAdvance += OW; + float *xFactor = reinterpret_cast(&indexTable[tblAdvance]); + tblAdvance += CUBIC_GRID_LEN * OW; + int *yOrigin = static_cast(&indexTable[tblAdvance]); + tblAdvance += OH; + float *yFactor = reinterpret_cast(&indexTable[tblAdvance]); + + tblAdvance += CUBIC_GRID_LEN * OH; + int *sequenceOH = static_cast(&indexTable[tblAdvance]); + tblAdvance += OW * OH; + int *sequenceOW = static_cast(&indexTable[tblAdvance]); + + parallel_for2d(B, C, [&](size_t n, size_t c) { + const uint8_t *in_ptr_nc = in_ptr_ + (IW * IH * C * n + IW * IH * c) * srcDataSize; + uint8_t *out_ptr_nc = out_ptr_ + (OW * OH * C * n + OW * OH * c) * dstDataSize; + + auto arg = jit_interpolate_call_args(); + arg.dst = out_ptr_nc; + arg.src_ptr[0] = in_ptr_nc; + arg.index = xOrigin; + arg.src_ptr[1] = yOrigin; + arg.src_ptr[2] = static_cast(&sequenceOH[0]); + arg.src_ptr[3] = static_cast(&sequenceOW[0]); + arg.weight_ptr[0] = xFactor; + arg.weight_ptr[1] = yFactor; + arg.work_amount = static_cast(OW * OH); + arg.oc_off = static_cast(c * sizeof(float)); + (*interpolateKernel)(&arg); + }); +} + +// ===================================================================================================================== // index layout: // d_0............d_OD-1, h_0..............h_OH-1, w_0................w_OW-1 -void MKLDNNInterpolateNode::buildTblNN(SizeVector& srcDimPad5d, SizeVector& dstDim5d, - std::vector& dataScales, InterpolateLayoutType layout) { - int dimSize = srcDim.size(); +void MKLDNNInterpolateNode::InterpolateExecutor::buildTblNN(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, + const std::vector& dataScales, InterpolateLayoutType layout, InterpolateNearestMode nearestMode) { + const int dimSize = dataRank; float fz = (dimSize == 5) ? dataScales[dimSize - 3] : 1.f; float fy = dataScales[dimSize - 2]; float fx = dataScales[dimSize - 1]; @@ -2081,22 +2504,98 @@ void MKLDNNInterpolateNode::buildTblNN(SizeVector& srcDimPad5d, SizeVector& dstD bool isWDownsample = (fx < 1) ? true : false; for (int oz = 0; oz < OD; oz++) { float iz = coordTransToInput(oz, fz, ID, OD); - indexTable[oz] = nearestRound(iz, isDDownsample); + indexTable[oz] = nearestRound(iz, isDDownsample, nearestMode); indexTable[oz] = clipCoord(indexTable[oz], ID); } for (int oy = 0; oy < OH; oy++) { float iy = coordTransToInput(oy, fy, IH, OH); - indexTable[OD + oy] = nearestRound(iy, isHDownsample); + indexTable[OD + oy] = nearestRound(iy, isHDownsample, nearestMode); indexTable[OD + oy] = clipCoord(indexTable[OD + oy], IH); } for (int ox = 0; ox < OW; ox++) { float ix = coordTransToInput(ox, fx, IW, OW); - indexTable[OD + OH + ox] = nearestRound(ix, isWDownsample); + indexTable[OD + OH + ox] = nearestRound(ix, isWDownsample, nearestMode); indexTable[OD + OH + ox] = clipCoord(indexTable[OD + OH + ox], IW); } } -void MKLDNNInterpolateNode::linearOnnxCF(int outCoord, float scale, int inShape, int outShape, int& index0, int& index1, float& weight0, float& weight1) { +// scale is float(outShape) / float(inShape) +// strictly consistent with onnx calc manner(div scale, not multiply inverse), given this is done offline +// the slight precison diff can produce obvious wrong value due to "nearest round" behavior for NN mode +float MKLDNNInterpolateNode::InterpolateExecutor::coordTransToInput(int outCoord, float scale, int inShape, int outShape) const { + if (scale == 1.0f || (inShape == outShape)) { + return outCoord; + } + switch (coordTransMode) { + case InterpolateCoordTransMode::half_pixel: { + return (outCoord + 0.5f) / scale - 0.5f; + break; + } + case InterpolateCoordTransMode::pytorch_half_pixel: { + if (outShape > 1) + return (outCoord + 0.5f) / scale - 0.5f; + else + return 0; + break; + } + case InterpolateCoordTransMode::asymmetric: { + return static_cast(outCoord) / scale; + break; + } + case InterpolateCoordTransMode::tf_half_pixel_for_nn: { + return (outCoord + 0.5f) / scale; + break; + } + case InterpolateCoordTransMode::align_corners: { + if (outShape > 1) + return outCoord * (static_cast(inShape - 1) / static_cast(outShape - 1)); + else + return 0; + break; + } + default: { + IE_THROW() << "errorPrefix" << " does not support specified coordinate transformation mode"; + break; + } + } +} + +int MKLDNNInterpolateNode::InterpolateExecutor::nearestRound(float originCoord, bool isDownsample, InterpolateNearestMode nearestMode) const { + switch (nearestMode) { + case InterpolateNearestMode::round_prefer_floor: { + if (originCoord == (static_cast(originCoord) + 0.5f)) + return static_cast(std::floor(originCoord)); + else + return static_cast(std::round(originCoord)); + break; + } + case InterpolateNearestMode::round_prefer_ceil: { + return static_cast(std::round(originCoord)); + break; + } + case InterpolateNearestMode::floor: { + return static_cast(std::floor(originCoord)); + break; + } + case InterpolateNearestMode::ceil: { + return static_cast(std::ceil(originCoord)); + break; + } + case InterpolateNearestMode::simple: { + if (isDownsample) + return static_cast(std::ceil(originCoord)); + else + return static_cast(originCoord); + } + default: { + IE_THROW() << "errorPrefix" << " does not support specified nearest round mode"; + break; + } + } +} + +void MKLDNNInterpolateNode::InterpolateExecutor::linearOnnxCF(int outCoord, float scale, int inShape, int outShape, + int& index0, int& index1, float& weight0, float& weight1) { float inCoord = coordTransToInput(outCoord, scale, inShape, outShape); inCoord = std::max(0.0f, std::min(inCoord, static_cast(inShape - 1))); index0 = std::min(static_cast(inCoord), inShape - 1); @@ -2110,9 +2609,9 @@ void MKLDNNInterpolateNode::linearOnnxCF(int outCoord, float scale, int inShape, } } -void MKLDNNInterpolateNode::buildTblLinearOnnx(SizeVector& srcDimPad5d, SizeVector& dstDim5d, - std::vector& dataScales, InterpolateLayoutType layout) { - int dimSize = srcDim.size(); +void MKLDNNInterpolateNode::InterpolateExecutor::buildTblLinearOnnx(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, + const std::vector& dataScales, InterpolateLayoutType layout) { + int dimSize = dataRank; float fz = (spatialDimSize > 2) ? dataScales[dimSize - 3] : 1.f; float fy = (spatialDimSize > 1) ? dataScales[dimSize - 2] : 1.f; float fx = dataScales[dimSize - 1]; @@ -2219,17 +2718,13 @@ void MKLDNNInterpolateNode::buildTblLinearOnnx(SizeVector& srcDimPad5d, SizeVect } } -static inline float triangleCoeff(float x) { - return (std::max)(0.0f, 1 - std::abs(x)); -} - // table layout: // wd .........wd, wh............wh, ww.............ww, id...........id, ih............ih, iw..............iw // | | // wh0.....wh_diameter ih0.....ih_diameter -void MKLDNNInterpolateNode::buildTblLinear(SizeVector& srcDimPad5d, SizeVector& dstDim5d, - std::vector& dataScales, int kernel_width, bool antialias) { - int dimSize = srcDim.size(); +void MKLDNNInterpolateNode::InterpolateExecutor::buildTblLinear(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, + const std::vector& dataScales, int kernel_width, bool antialias) { + int dimSize = dataRank; float fz = (dimSize == 5) ? dataScales[dimSize - 3] : 1.f; float fy = dataScales[dimSize - 2]; float fx = dataScales[dimSize - 1]; @@ -2304,7 +2799,7 @@ void MKLDNNInterpolateNode::buildTblLinear(SizeVector& srcDimPad5d, SizeVector& } } -std::vector MKLDNNInterpolateNode::getCubicCoeffs(float mantissa, float a) { +std::vector MKLDNNInterpolateNode::InterpolateExecutor::getCubicCoeffs(float mantissa, float a) { float m = std::fabs(mantissa); std::vector coeffs(4, 0.f); @@ -2318,9 +2813,9 @@ std::vector MKLDNNInterpolateNode::getCubicCoeffs(float mantissa, float a // table layout: // OW OW OW OW OW OH OH OH OH OH // x_idx x_weight0 x_weight1 x_weight2 x_weight3 y_idx y_weight0 y_weight1 y_weight2 y_weight3 -void MKLDNNInterpolateNode::buildTblCubic(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector& dataScales, +void MKLDNNInterpolateNode::InterpolateExecutor::buildTblCubic(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, float cubicCoeff, InterpolateLayoutType layout) { - int dimSize = srcDim.size(); + int dimSize = dataRank; float fy = dataScales[dimSize - 2]; float fx = dataScales[dimSize - 1]; int IH = srcDimPad5d[3], IW = srcDimPad5d[4]; @@ -2383,276 +2878,8 @@ void MKLDNNInterpolateNode::buildTblCubic(SizeVector& srcDimPad5d, SizeVector& d } } -void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { - mkldnn::post_ops ops; - - for (auto &node : fusedWith) { - auto* fakeQuantizeNode = dynamic_cast(node.get()); - if (fakeQuantizeNode) { - fakeQuantizeNode->appendPostOps(ops); - continue; - } - - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode) { - constexpr int align = 16; - // TODO [DS]: change to shape from memory - eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align); - continue; - } - - IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented"; - } - - attr.set_post_ops(ops); -} - -SizeVector MKLDNNInterpolateNode::getPaddedInputShape() { - SizeVector paddedShape; - int dataRank = srcDim.size(); - for (int i = 0; i < dataRank; i++) { - paddedShape.push_back(srcDim[i] + padBegin[i] + padEnd[i]); - } - return paddedShape; -} - -// get scales of data rank size -// if "scale" version: set scales with input scales, 1.f for other dims not in axis -// if "size" version: scales = shape[target] / shape[input].pad, 1.f for other dims not in axis -// scales is a required input, but should not use input scales when "size" case, which may added eps that lead to inaccurate result, recalculate scales instead. -std::vector MKLDNNInterpolateNode::getScales() { - int dataRank = srcDim.size(); - std::vector fullScales(dataRank, 1.f); - int axesRank = axes.size(); - for (int i = 0; i < axesRank; i++) { - int axis = axes[i]; - fullScales[axis] = (shapeCalcMode == InterpolateShapeCalcMode::scales) ? scales[i] : - static_cast(dstDim[axis]) / static_cast(srcDimPad[axis]); - } - return fullScales; -} - -void MKLDNNInterpolateNode::execute(mkldnn::stream strm) { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); - - uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); - uint8_t *src_data_origin = reinterpret_cast(srcMemPtr->GetData()); - - size_t dimSize = srcDim.size(); - SizeVector srcDimPad = getPaddedInputShape(); - - auto srcDim5d = to5Dim(srcDim); - auto srcDimPad5d = to5Dim(srcDimPad); - auto dstDim5d = to5Dim(dstDim); - - uint8_t *src_data = nullptr; - std::vector srcPadded; - if (hasPad) { - int padB0 = (dimSize > 2) ? padBegin[0] : 0; - int padB1 = (dimSize > 2) ? padBegin[1] : 0; - int padB2 = (dimSize == 5) ? padBegin[dimSize - 3] : 0; - int padB3 = padBegin[dimSize - 2]; - int padB4 = padBegin[dimSize - 1]; - - SizeVector inShapeBlock = getBlockND(srcDim5d); - SizeVector inShapePadBlock = getBlockND(srcDimPad5d); - - if (configured_for_layout == InterpolateLayoutType::planar) { - srcPadded.resize(inShapePadBlock[0] * srcDataSize, 0); - uint8_t *src_data_pad = static_cast(&srcPadded[0]); - parallel_for4d(srcDim5d[0], srcDim5d[1], srcDim5d[2], srcDim5d[3], [&](int n, int c, int d, int h) { - uint8_t *src = src_data_origin + (inShapeBlock[1] * n + inShapeBlock[2] * c + inShapeBlock[3] * d + inShapeBlock[4] * h) * srcDataSize; - uint8_t *srcPad = src_data_pad + (inShapePadBlock[1] * (n + padB0) + inShapePadBlock[2] * (c + padB1) + - inShapePadBlock[3] * (d + padB2) + inShapePadBlock[4] * (h + padB3) + padB4) * srcDataSize; - cpu_memcpy(srcPad, src, srcDim5d[4] * srcDataSize); - }); - src_data = src_data_pad; - } else if (configured_for_layout == InterpolateLayoutType::by_channel) { - srcPadded.resize(inShapePadBlock[0] * srcDataSize, 0); - uint8_t *src_data_pad = static_cast(&srcPadded[0]); - parallel_for4d(srcDim5d[0], srcDim5d[2], srcDim5d[3], srcDim5d[4], [&](int n, int d, int h, int w) { - uint8_t *src = src_data_origin + (inShapeBlock[1] * n + - (inShapeBlock[3] * d + inShapeBlock[4] * h + inShapeBlock[5] * w) * srcDim5d[1]) * srcDataSize; - uint8_t *srcPad = src_data_pad + (inShapePadBlock[1] * (n + padB0) + (inShapePadBlock[3] * (d + padB2) + - inShapePadBlock[4] * (h + padB3) + inShapePadBlock[5] * (w + padB4)) * srcDimPad5d[1] + padB1) * srcDataSize; - cpu_memcpy(srcPad, src, srcDim5d[1] * srcDataSize); - }); - src_data = src_data_pad; - } else if (configured_for_layout == InterpolateLayoutType::block) { - size_t blkSize = mayiuse(cpu::x64::avx512_common) ? 16 : 8; - size_t CB = div_up(srcDimPad5d[1], blkSize); - size_t eltsTotal = srcDimPad5d[0] * CB * srcDimPad5d[2] * srcDimPad5d[3] * srcDimPad5d[4] * blkSize; - srcPadded.resize(eltsTotal * srcDataSize, 0x0); - uint8_t *src_data_pad = static_cast(&srcPadded[0]); - if ((srcDim5d[0] != srcDimPad5d[0]) || (srcDim5d[1] != srcDimPad5d[1])) { - IE_THROW() << "Interpolate layer with name '" << getName() << - "' does not support padding on batch and channel dimensions"; - } - parallel_for5d(srcDim5d[0], CB, srcDim5d[2], srcDim5d[3], srcDim5d[4], [&](int n, int cb, int d, int h, int w) { - uint8_t *src = src_data_origin + (n * CB * srcDim5d[2] * srcDim5d[3] * srcDim5d[4] * blkSize) * srcDataSize - + (cb * srcDim5d[2] * srcDim5d[3] * srcDim5d[4] * blkSize) * srcDataSize - + (d * srcDim5d[3] * srcDim5d[4] * blkSize) * srcDataSize - + (h * srcDim5d[4] * blkSize) * srcDataSize - + (w * blkSize) * srcDataSize; - uint8_t *srcPad = src_data_pad + (n * CB * srcDimPad5d[2] * srcDimPad5d[3] * srcDimPad5d[4] * blkSize) * srcDataSize - + (cb * srcDimPad5d[2] * srcDimPad5d[3] * srcDimPad5d[4] * blkSize) * srcDataSize - + ((d + padB2) * srcDimPad5d[3] * srcDimPad5d[4] * blkSize) * srcDataSize - + ((h + padB3) * srcDimPad5d[4] * blkSize) * srcDataSize - + ((w + padB4) * blkSize) * srcDataSize; - cpu_memcpy(srcPad, src, blkSize * srcDataSize); - }); - src_data = src_data_pad; - } - } else { - src_data = src_data_origin; - } - - size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4]; - size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4]; - std::vector dataScales = getScales(); - if (dimSize > 2 && (dataScales[0] != 1.f || dataScales[1] != 1.f)) { - IE_THROW() << "Interpolate layer only supports resize on spatial dimensions(depth, height and width)"; - } - - switch (mode) { - case InterpolateMode::nearest: { - if (interpolateKernel) { - if (configured_for_layout == InterpolateLayoutType::planar) { - NNPlanar(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW); - } else { - NNCGathered(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW); - } - } else { - NNRef(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW); - } - break; - } - case InterpolateMode::linear_onnx: { - if (interpolateKernel) { - if (configured_for_layout == InterpolateLayoutType::planar) { - linearOnnxPlanar(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW); - } else { - linearOnnxCGathered(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW); - } - } else { - linearOnnxRef(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW); - } - break; - } - case InterpolateMode::cubic: { - if (interpolateKernel) { - if (configured_for_layout == InterpolateLayoutType::planar) { - cubicPlanar(src_data, dst_data, N, C, IH, IW, OH, OW); - } else { - cubicCGathered(src_data, dst_data, N, C, IH, IW, OH, OW); - } - } else { - cubicRef(src_data, dst_data, N, C, IH, IW, OH, OW); - } - break; - } - case InterpolateMode::linear: { - float fz = (dimSize == 5) ? dataScales[dimSize - 3] : 1.f; - float fy = dataScales[dimSize - 2]; - float fx = dataScales[dimSize - 1]; - - bool isDownsample = (fx < 1.f) || (fy < 1.f) || (fz < 1.f); - int kernel_width = 2; - linearInterpolation(src_data, dst_data, N, C, ID, IH, IW, fx, fy, fz, OD, OH, OW, kernel_width, isDownsample && antialias); - break; - } - default: { - IE_THROW() << "Interpolate layer has unsupported interpolate mode: " << mode; - } - } -} - -// for ndhwc and nCdhw8c[16c] -// input may be f32/bf16/int8, fused->output varies -void MKLDNNInterpolateNode::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { - int *index_d = static_cast(&indexTable[0]); - int *index_h = static_cast(&indexTable[OD]); - int *index_w = static_cast(&indexTable[OD + OH]); - - bool is_nhwc = (configured_for_layout == by_channel); - - for (int b = 0; b < B; b++) { - if (is_nhwc) { - const uint8_t *in_ptr = in_ptr_ + (IW * IH * ID * C * b) * srcDataSize; - uint8_t *out_ptr = out_ptr_ + (OW * OH * OD * C * b) * dstDataSize; - std::vector index_w_kernel(OW); - for (int ox = 0; ox < OW; ox++) { - index_w_kernel[ox] = index_w[ox] * C * srcDataSize; - } - parallel_for2d(OD, OH, [&](size_t d, size_t h) { - // kernel for C * OW - uint8_t *out_ptr_dh = out_ptr + (C * OW * OH * d + C * OW * h) * dstDataSize; - const uint8_t *in_ptr_dh = in_ptr + (C * IW * IH * index_d[d] + C * IW * index_h[h]) * srcDataSize; - auto arg = jit_interpolate_call_args(); - arg.dst = out_ptr_dh; - arg.src_ptr[0] = in_ptr_dh; - arg.index = static_cast(&(index_w_kernel[0])); - arg.work_amount = C; - arg.oc_off = 0; - (*interpolateKernel)(&arg); - }); - } else { // for blk - int blk_size = mayiuse(cpu::x64::avx512_common) ? 16 : 8; - int CB = div_up(C, blk_size); - const uint8_t *in_ptr = in_ptr_ + (IW * IH * ID * CB * blk_size * b) * srcDataSize; - uint8_t *out_ptr = out_ptr_ + (OW * OH * OD * CB * blk_size * b) * dstDataSize; - std::vector index_w_kernel(OW); - for (int ox = 0; ox < OW; ox++) { - index_w_kernel[ox] = index_w[ox] * blk_size * srcDataSize; - } - parallel_for2d(CB, OD, [&](size_t cb, size_t d) { - uint8_t *out_ptr_cbd = out_ptr + (blk_size * OW * OH * OD * cb + blk_size * OW * OH * d) * dstDataSize; - const uint8_t *in_ptr_cbd = in_ptr + (blk_size * IW * IH * ID * cb + blk_size * IW * IH * index_d[d]) * srcDataSize; - auto arg = jit_interpolate_call_args(); - for (int h = 0; h < OH; h++) { // kernel for blk_size * OW - arg.dst = out_ptr_cbd + blk_size * OW * h * dstDataSize; - arg.src_ptr[0] = in_ptr_cbd + blk_size * IW * index_h[h] * srcDataSize; - arg.index = static_cast(&(index_w_kernel[0])); - arg.work_amount = static_cast(OW); - arg.oc_off = cb * blk_size * sizeof(float); - (*interpolateKernel)(&arg); - } - }); - } - } // batch end -} - -void MKLDNNInterpolateNode::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { - int *index_d = static_cast(&indexTable[0]); - int *index_h = static_cast(&indexTable[OD]); - int *index_w = static_cast(&indexTable[OD + OH]); - - std::vector index_kernel(OH + OW); - // index_h * IW * srcDataSize to reduce and simplify redundant compute - for (int oh = 0; oh < OH; oh++) { - index_kernel[oh] = index_h[oh] * IW * srcDataSize; - } - // index_w * srcDataSize - for (int ow = 0; ow < OW; ow++) { - index_kernel[OH + ow] = index_w[ow] * srcDataSize; - } - - parallel_for3d(B, C, OD, [&](size_t b, size_t c, size_t od) { - const uint8_t *in_ptr = in_ptr_ + (IW * IH * ID * C * b + IW * IH * ID * c + IW * IH * index_d[od]) * srcDataSize; - uint8_t *out_ptr = out_ptr_ + (OW * OH * OD * C * b + OW * OH * OD * c + OW * OH * od) * dstDataSize; - - auto arg = jit_interpolate_call_args(); - arg.src_ptr[0] = in_ptr; - arg.dst = out_ptr; - arg.index = static_cast(&index_kernel[0]); // need index_h and index_w in kernel, it's in continous memory so one param - arg.oc_off = static_cast(c * sizeof(float)); - // work_amount is OH(out loop) and OW(inner loop), can get in kernel from jcp. - (*interpolateKernel)(&arg); - }); -} - -void MKLDNNInterpolateNode::NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { +void MKLDNNInterpolateNode::InterpolateRefExecutor::NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, + int OD, int OH, int OW) { int *index_d = static_cast(&indexTable[0]); int *index_h = static_cast(&indexTable[OD]); int *index_w = static_cast(&indexTable[OD + OH]); @@ -2673,99 +2900,8 @@ void MKLDNNInterpolateNode::NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int }); } -void MKLDNNInterpolateNode::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { - // FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, EndTopLeft:4, EndTopRight:5, EndBottomLeft:6, EndBottomRight:7 - // weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5 - int *index = static_cast(&indexTable[0]); - int eltInGrid = (spatialDimSize > 2) ? MAX_INPUT_INTERPOLATE : ((spatialDimSize > 1) ? 4 : 2); - int scratchLen = rnd_up(eltInGrid * OW * OH * OD, 16); - float *weight = reinterpret_cast(&indexTable[scratchLen]); - - parallel_for2d(B, C, [&](size_t b, size_t c) { - uint8_t *out_ptr_nc = out_ptr_ + (OH * OW * OD * C * b + OH * OW * OD * c) * dstDataSize; - const uint8_t *in_ptr_nc = in_ptr_ + (IH * IW * ID * C * b + IH * IW * ID * c) * srcDataSize; - auto arg = jit_interpolate_call_args(); - arg.src_ptr[0] = in_ptr_nc; - arg.index = static_cast(&index[0]); - arg.weight_ptr[0] = static_cast(&weight[0]); - arg.dst = out_ptr_nc; - arg.work_amount = OW * OH * OD; - arg.oc_off = static_cast(c * sizeof(float)); - (*interpolateKernel)(&arg); - }); -} - -void MKLDNNInterpolateNode::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { - // left:OW right:OW Top:OH Bottom:OH Front:OD End:OD - std::vector indexPtr(MAX_INPUT_INTERPOLATE, 0); - std::vector weightPtr(MAX_INPUT_INTERPOLATE, 0); - size_t scratchLen = rnd_up(OW + OW + OH + OH + OD + OD, 16); - indexPtr[0] = static_cast(&indexTable[0]); - indexPtr[1] = static_cast(&indexTable[OW]); - indexPtr[2] = static_cast(&indexTable[2 * OW]); - indexPtr[3] = static_cast(&indexTable[2 * OW + OH]); - indexPtr[4] = static_cast(&indexTable[2 * OW + 2 * OH]); - indexPtr[5] = static_cast(&indexTable[2 * OW + 2 * OH + OD]); - - weightPtr[0] = reinterpret_cast(&indexTable[scratchLen]); - weightPtr[1] = reinterpret_cast(&indexTable[scratchLen + OW]); - weightPtr[2] = reinterpret_cast(&indexTable[scratchLen + 2 * OW]); - weightPtr[3] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + OH]); - weightPtr[4] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + 2 * OH]); - weightPtr[5] = reinterpret_cast(&indexTable[scratchLen + 2 * OW + 2 * OH + OD]); - - bool isByChannel = (configured_for_layout == by_channel) ? true : false; - - int blkSize = mayiuse(cpu::x64::avx512_common) ? 16 : 8; - int CB = isByChannel ? 1 : div_up(C, blkSize); - int CGatherLen = isByChannel ? C : blkSize; - int workAmount = isByChannel ? C : CB; - // n_CB(1)_d_h_w_8[16](c), () for by-channel - int C0 = OW * CGatherLen; - int C1 = OH * C0; - int C2 = OD * C1; - int C3 = CB * C2; - int I0 = IW * CGatherLen; - int I1 = IH * I0; - int I2 = ID * I1; - int I3 = CB * I2; - parallel_for3d(B, OD, OH, [&](size_t b, size_t d, size_t h) { - uint8_t *out_ptr_ndh = out_ptr_ + (C3 * b + C1 * d + C0 * h) * dstDataSize; - - const uint8_t *in_ptr_n = in_ptr_ + (I3 * b) * srcDataSize; - const uint8_t *in_ptr_nf = in_ptr_n + (indexPtr[4][d] * I1) * srcDataSize; - const uint8_t *in_ptr_nft = in_ptr_nf + (indexPtr[2][h] * I0) * srcDataSize; - const uint8_t *in_ptr_nfb = in_ptr_nf + (indexPtr[3][h] * I0) * srcDataSize; - const uint8_t *in_ptr_ne = in_ptr_n + (indexPtr[5][d] * I1) * srcDataSize; - const uint8_t *in_ptr_net = in_ptr_ne + (indexPtr[2][h] * I0) * srcDataSize; - const uint8_t *in_ptr_neb = in_ptr_ne + (indexPtr[3][h] * I0) * srcDataSize; - auto arg = jit_interpolate_call_args(); - for (int w = 0; w < OW; ++w) { - uint8_t *out_ptr_ndhw = out_ptr_ndh + CGatherLen * w * dstDataSize; - - arg.src_ptr[0] = in_ptr_nft + (indexPtr[0][w] * CGatherLen) * srcDataSize; - arg.src_ptr[1] = in_ptr_nft + (indexPtr[1][w] * CGatherLen) * srcDataSize; - arg.src_ptr[2] = in_ptr_nfb + (indexPtr[0][w] * CGatherLen) * srcDataSize; - arg.src_ptr[3] = in_ptr_nfb + (indexPtr[1][w] * CGatherLen) * srcDataSize; - arg.src_ptr[4] = in_ptr_net + (indexPtr[0][w] * CGatherLen) * srcDataSize; - arg.src_ptr[5] = in_ptr_net + (indexPtr[1][w] * CGatherLen) * srcDataSize; - arg.src_ptr[6] = in_ptr_neb + (indexPtr[0][w] * CGatherLen) * srcDataSize; - arg.src_ptr[7] = in_ptr_neb + (indexPtr[1][w] * CGatherLen) * srcDataSize; - arg.weight_ptr[0] = static_cast(&weightPtr[0][w]); - arg.weight_ptr[1] = static_cast(&weightPtr[1][w]); - arg.weight_ptr[2] = static_cast(&weightPtr[2][h]); - arg.weight_ptr[3] = static_cast(&weightPtr[3][h]); - arg.weight_ptr[4] = static_cast(&weightPtr[4][d]); - arg.weight_ptr[5] = static_cast(&weightPtr[5][d]); - arg.dst = out_ptr_ndhw; - arg.work_amount = workAmount; - arg.oc_off = 0; - (*interpolateKernel)(&arg); - } - }); -} - -void MKLDNNInterpolateNode::linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { +void MKLDNNInterpolateNode::InterpolateRefExecutor::linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, + int OD, int OH, int OW) { std::vector indexPtr(MAX_INPUT_INTERPOLATE, 0); std::vector weightPtr(MAX_INPUT_INTERPOLATE, 0); // FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, @@ -2862,11 +2998,102 @@ void MKLDNNInterpolateNode::linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_p }); } -void MKLDNNInterpolateNode::linearInterpolation(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, +void MKLDNNInterpolateNode::InterpolateRefExecutor::cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { + const int idxNum = 1; + int *xOrigin = static_cast(&indexTable[0]); + float *xFactor = reinterpret_cast(&indexTable[OW]); + int *yOrigin = static_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW]); + float *yFactor = reinterpret_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW + OH]); + + const float *in_ptr_f32 = reinterpret_cast(in_ptr_); + float *out_ptr_f32 = reinterpret_cast(out_ptr_); + + parallel_for4d(B, C, OH, OW, [&](size_t n, size_t c, size_t oy, size_t ox) { + const float *in_ptr_nc = in_ptr_f32 + (IW * IH * C * n + IW * IH * c); + float *out_ptr_nc = out_ptr_f32 + (OW * OH * C * n + OW * OH * c); + + int iy = yOrigin[oy]; + int ix = xOrigin[ox]; + + float retY = 0.f; + for (int y = iy - 1, i = 0; y <= iy + 2; y++, i++) { + int yInRange = std::max(0, std::min(y, IH - 1)); + const float *in_ptr_nch = in_ptr_nc + IW * yInRange; + float retX = 0.f; + for (int x = ix - 1, j = 0; x <= ix + 2; x++, j++) { + int xInRange = std::max(0, std::min(x, IW - 1)); + retX += xFactor[ox * CUBIC_GRID_LEN + j] * in_ptr_nch[xInRange]; + } + retY += yFactor[oy * CUBIC_GRID_LEN + i] * retX; + } + out_ptr_nc[oy * OW + ox] = retY; + }); +} + +float MKLDNNInterpolateNode::InterpolateRefExecutor::getValue(const uint8_t *base, size_t offset, InferenceEngine::Precision prec) { + const uint8_t *baseOffset = base + offset; + switch (prec) { + case Precision::U8: { + return static_cast(*baseOffset); + break; + } + case Precision::I8: { + const int8_t *valuePtr = reinterpret_cast(baseOffset); + return static_cast(*valuePtr); + break; + } + case Precision::BF16: { + const uint16_t *valuePtr = reinterpret_cast(baseOffset); + return bfloat16_t::from_bits(*valuePtr); + break; + } + case Precision::FP32: { + const float *valuePtr = reinterpret_cast(baseOffset); + return *valuePtr; + break; + } + default: { + IE_THROW() << "Interpolate layer does not support precision: " << prec; + break; + } + } +} + +void MKLDNNInterpolateNode::InterpolateRefExecutor::setValue(uint8_t *base, size_t offset, float value, InferenceEngine::Precision prec) { + uint8_t *baseOffset = base + offset; + switch (prec) { + case Precision::U8: { + uint8_t data = static_cast(value < 0 ? 0 : value); + cpu_memcpy(baseOffset, &data, 1); + break; + } + case Precision::I8: { + int8_t data = static_cast(value); + cpu_memcpy(baseOffset, &data, 1); + break; + } + case Precision::BF16: { + uint16_t data = bfloat16_t(value).to_bits(); + cpu_memcpy(baseOffset, &data, 2); + break; + } + case Precision::FP32: { + cpu_memcpy(baseOffset, &value, sizeof(float)); + break; + } + default: { + IE_THROW() << "Interpolate layer does not support precision: " << prec; + break; + } + } +} + +void MKLDNNInterpolateNode::InterpolateRefExecutor::linearInterpolation(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias) { if (IW == OW && IH == OH && ID == OD) { size_t spatialDimSize = IW * IH * ID; - if (fusedWith.empty() && inputPrec == outputPrec) { + // TODO: enable when fusing into interp with linear mode will support + if (/*fusedWith.empty() &&*/ inputPrec == outputPrec) { size_t size = B * C * spatialDimSize * srcDataSize; cpu_memcpy(out_ptr_, in_ptr_, size); } else { @@ -2978,252 +3205,174 @@ void MKLDNNInterpolateNode::linearInterpolation(const uint8_t *in_ptr_, uint8_t }); } -void MKLDNNInterpolateNode::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { - const int idxNum = 1; - int *xOrigin = static_cast(&indexTable[0]); - float *xFactor = reinterpret_cast(&indexTable[OW]); - int *yOrigin = static_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW]); - float *yFactor = reinterpret_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW + OH]); +MKLDNNInterpolateNode::InterpolateExecutor::InterpolateExecutor(const InterpolateAttrs& interpAttrs, + const VectorDims &srcDims, + const VectorDims &dstDims, + const std::vector &dataScales) : + mode(interpAttrs.mode), configured_for_layout(interpAttrs.layout), coordTransMode(interpAttrs.coordTransMode), + inputPrec(interpAttrs.inPrc), outputPrec(interpAttrs.outPrc) { + srcDimPad5d = to5Dim(getPaddedInputShape(srcDims, interpAttrs.padBegin, interpAttrs.padEnd)); + dstDim5d = to5Dim(dstDims); + srcDataSize = interpAttrs.inPrc.size(); + dstDataSize = interpAttrs.outPrc.size(); + dataRank = srcDims.size(); + spatialDimSize = getSpatialDimsNum(dataRank); - int blkSize = mayiuse(cpu::x64::avx512_common) ? 16 : 8; - int CB = div_up(C, blkSize); - int CSize = configured_for_layout == InterpolateLayoutType::by_channel ? C : blkSize * CB; - int CGatherLen = configured_for_layout == InterpolateLayoutType::by_channel ? C : blkSize; - int workAmount = configured_for_layout == InterpolateLayoutType::by_channel ? C : CB; + switch (mode) { + case InterpolateMode::nearest: { + buildTblNN(srcDimPad5d, dstDim5d, dataScales, interpAttrs.layout, interpAttrs.nearestMode); + break; + } + case InterpolateMode::linear_onnx: { + buildTblLinearOnnx(srcDimPad5d, dstDim5d, dataScales, interpAttrs.layout); + break; + } + case InterpolateMode::linear: { + static constexpr int LINEAR_KERNEL = 2; + buildTblLinear(srcDimPad5d, dstDim5d, dataScales, LINEAR_KERNEL, interpAttrs.antialias); + break; + } + case InterpolateMode::cubic: { + buildTblCubic(srcDimPad5d, dstDim5d, dataScales, interpAttrs.cubeCoeff, interpAttrs.layout); + break; + } + default: { + IE_THROW() << "Interpolate executor does not support interpolate mode: " << mode; + break; + } + } +} - parallel_for3d(B, OH, OW, [&](size_t b, size_t h, size_t w) { - uint8_t *out_ptr_nhw = out_ptr_ + (OH * OW * CSize * b + OW * CGatherLen * h + CGatherLen * w) * dstDataSize; - const uint8_t *in_ptr_n = in_ptr_ + (IH * IW * CSize * b) * srcDataSize; +MKLDNNInterpolateNode::InterpolateJitExecutor::InterpolateJitExecutor(const InterpolateAttrs& interpAttrs, + const VectorDims &srcDims, + const VectorDims &dstDims, + const std::vector &dataScales, + const mkldnn::primitive_attr &attr) : + InterpolateExecutor(interpAttrs, srcDims, dstDims, dataScales) { + auto jcp = jit_interpolate_config_params(); + jcp.mode = mode; + jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(interpAttrs.inPrc); + jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(interpAttrs.outPrc); + jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); + jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); + jcp.indices_size = sizeof(int); + jcp.OW = dstDim5d[4]; + jcp.OH = dstDim5d[3]; + jcp.OD = dstDim5d[2]; + jcp.IW = srcDimPad5d[4]; + jcp.IH = srcDimPad5d[3]; + jcp.ID = srcDimPad5d[2]; + jcp.spatial_dim_size = getSpatialDimsNum(srcDims.size()); + jcp.layout = interpAttrs.layout; + if (jcp.layout != InterpolateLayoutType::planar) { + if (mayiuse(cpu::x64::avx512_common)) { + interpolateKernel.reset(new jit_uni_interpolate_kernel_f32(jcp, *attr.get())); + } else if (mayiuse(cpu::x64::avx2)) { + interpolateKernel.reset(new jit_uni_interpolate_kernel_f32(jcp, *attr.get())); + } else if (mayiuse(cpu::x64::sse41)) { + interpolateKernel.reset(new jit_uni_interpolate_kernel_f32(jcp, *attr.get())); + } + } else if (mayiuse(cpu::x64::avx2) && interpAttrs.inPrc == InferenceEngine::Precision::FP32) { + // gather ISA(for planar JIT kernel) for avx2 and fp32 + interpolateKernel.reset(new jit_uni_interpolate_kernel_f32(jcp, *attr.get())); + } else { + IE_THROW() << "Can't create InterpolateJitExecutor"; + } + if (interpolateKernel) { + interpolateKernel->create_ker(); + } else { + IE_THROW() << "Can't compile InterpolateJitExecutor"; + } +} - std::vector kernelIndex(CUBIC_GRID_LEN * CUBIC_GRID_LEN); // 16 address offset to src(batch) or src(CB) - int iy = yOrigin[h]; - int ix = xOrigin[w]; - for (int y = iy - 1, i = 0; y <= iy + 2; y++, i++) { - int yInRange = std::max(0, std::min(y, IH - 1)); - yInRange = yInRange * CGatherLen * IW * srcDataSize; - for (int x = ix - 1, j = 0; x <= ix + 2; x++, j++) { - int xInRange = std::max(0, std::min(x, IW - 1)); - xInRange = yInRange + xInRange * CGatherLen * srcDataSize; - kernelIndex[i * CUBIC_GRID_LEN + j] = xInRange; +void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) { + size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4]; + size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4]; + + if (!interpolateKernel) { + IE_THROW() << "Can't execute, kernel for Interpolate node is not compiled"; + } + switch (mode) { + case InterpolateMode::nearest: { + if (configured_for_layout == InterpolateLayoutType::planar) { + NNPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); + } else { + NNCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); } + break; } - auto arg = jit_interpolate_call_args(); - arg.dst = out_ptr_nhw; - arg.src_ptr[0] = in_ptr_n; - arg.index = static_cast(&kernelIndex[0]); - // 0 for weight_W, 1 for weight_H - arg.weight_ptr[0] = static_cast(&xFactor[w * CUBIC_GRID_LEN]); - arg.weight_ptr[1] = static_cast(&yFactor[h * CUBIC_GRID_LEN]); - - // for by channel, src + step, dst + step, process next step on continuous memory - // for blk, src + IW*IH*blkSize, dst + OW*OH*blkSize, process the blkSize on next CB - arg.work_amount = workAmount; - arg.oc_off = 0; - (*interpolateKernel)(&arg); - }); -} - -void MKLDNNInterpolateNode::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { - int tblAdvance = 0; - int *xOrigin = static_cast(&indexTable[tblAdvance]); - tblAdvance += OW; - float *xFactor = reinterpret_cast(&indexTable[tblAdvance]); - tblAdvance += CUBIC_GRID_LEN * OW; - int *yOrigin = static_cast(&indexTable[tblAdvance]); - tblAdvance += OH; - float *yFactor = reinterpret_cast(&indexTable[tblAdvance]); - - tblAdvance += CUBIC_GRID_LEN * OH; - int *sequenceOH = static_cast(&indexTable[tblAdvance]); - tblAdvance += OW * OH; - int *sequenceOW = static_cast(&indexTable[tblAdvance]); - - parallel_for2d(B, C, [&](size_t n, size_t c) { - const uint8_t *in_ptr_nc = in_ptr_ + (IW * IH * C * n + IW * IH * c) * srcDataSize; - uint8_t *out_ptr_nc = out_ptr_ + (OW * OH * C * n + OW * OH * c) * dstDataSize; - - auto arg = jit_interpolate_call_args(); - arg.dst = out_ptr_nc; - arg.src_ptr[0] = in_ptr_nc; - arg.index = xOrigin; - arg.src_ptr[1] = yOrigin; - arg.src_ptr[2] = static_cast(&sequenceOH[0]); - arg.src_ptr[3] = static_cast(&sequenceOW[0]); - arg.weight_ptr[0] = xFactor; - arg.weight_ptr[1] = yFactor; - arg.work_amount = static_cast(OW * OH); - arg.oc_off = static_cast(c * sizeof(float)); - (*interpolateKernel)(&arg); - }); -} - -void MKLDNNInterpolateNode::cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { - const int idxNum = 1; - int *xOrigin = static_cast(&indexTable[0]); - float *xFactor = reinterpret_cast(&indexTable[OW]); - int *yOrigin = static_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW]); - float *yFactor = reinterpret_cast(&indexTable[(CUBIC_GRID_LEN + idxNum) * OW + OH]); - - const float *in_ptr_f32 = reinterpret_cast(in_ptr_); - float *out_ptr_f32 = reinterpret_cast(out_ptr_); - - parallel_for4d(B, C, OH, OW, [&](size_t n, size_t c, size_t oy, size_t ox) { - const float *in_ptr_nc = in_ptr_f32 + (IW * IH * C * n + IW * IH * c); - float *out_ptr_nc = out_ptr_f32 + (OW * OH * C * n + OW * OH * c); - - int iy = yOrigin[oy]; - int ix = xOrigin[ox]; - - float retY = 0.f; - for (int y = iy - 1, i = 0; y <= iy + 2; y++, i++) { - int yInRange = std::max(0, std::min(y, IH - 1)); - const float *in_ptr_nch = in_ptr_nc + IW * yInRange; - float retX = 0.f; - for (int x = ix - 1, j = 0; x <= ix + 2; x++, j++) { - int xInRange = std::max(0, std::min(x, IW - 1)); - retX += xFactor[ox * CUBIC_GRID_LEN + j] * in_ptr_nch[xInRange]; + case InterpolateMode::linear_onnx: { + if (configured_for_layout == InterpolateLayoutType::planar) { + linearOnnxPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); + } else { + linearOnnxCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); } - retY += yFactor[oy * CUBIC_GRID_LEN + i] * retX; - } - out_ptr_nc[oy * OW + ox] = retY; - }); -} - -float MKLDNNInterpolateNode::getValue(const uint8_t *base, size_t offset, InferenceEngine::Precision prec) { - const uint8_t *baseOffset = base + offset; - switch (prec) { - case Precision::U8: { - return static_cast(*baseOffset); break; } - case Precision::I8: { - const int8_t *valuePtr = reinterpret_cast(baseOffset); - return static_cast(*valuePtr); - break; - } - case Precision::BF16: { - const uint16_t *valuePtr = reinterpret_cast(baseOffset); - return bfloat16_t::from_bits(*valuePtr); - break; - } - case Precision::FP32: { - const float *valuePtr = reinterpret_cast(baseOffset); - return *valuePtr; + case InterpolateMode::cubic: { + if (configured_for_layout == InterpolateLayoutType::planar) { + cubicPlanar(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW); + } else { + cubicCGathered(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW); + } break; } default: { - IE_THROW() << "Interpolate layer does not support precision: " << prec; - break; + IE_THROW() << "InterpolateJitExecutor has unsupported interpolate mode: " << mode; } } } -void MKLDNNInterpolateNode::setValue(uint8_t *base, size_t offset, float value, InferenceEngine::Precision prec) { - uint8_t *baseOffset = base + offset; - switch (prec) { - case Precision::U8: { - uint8_t data = static_cast(value < 0 ? 0 : value); - cpu_memcpy(baseOffset, &data, 1); +void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) { + size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4]; + size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4]; + + switch (mode) { + case InterpolateMode::nearest: { + NNRef(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); break; } - case Precision::I8: { - int8_t data = static_cast(value); - cpu_memcpy(baseOffset, &data, 1); + case InterpolateMode::linear_onnx: { + linearOnnxRef(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); break; } - case Precision::BF16: { - uint16_t data = bfloat16_t(value).to_bits(); - cpu_memcpy(baseOffset, &data, 2); + case InterpolateMode::cubic: { + cubicRef(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW); break; } - case Precision::FP32: { - cpu_memcpy(baseOffset, &value, sizeof(float)); + case InterpolateMode::linear: { + float fz = (dataRank == 5) ? dataScales[dataRank - 3] : 1.f; + float fy = dataScales[dataRank - 2]; + float fx = dataScales[dataRank - 1]; + + bool isDownsample = (fx < 1.f) || (fy < 1.f) || (fz < 1.f); + int kernel_width = 2; + linearInterpolation(in_ptr_, out_ptr_, N, C, ID, IH, IW, fx, fy, fz, OD, OH, OW, kernel_width, isDownsample && antialias); break; } default: { - IE_THROW() << "Interpolate layer does not support precision: " << prec; - break; + IE_THROW() << "Interpolate layer has unsupported interpolate mode: " << mode; } } } -// scale is float(outShape) / float(inShape) -// strictly consistent with onnx calc manner(div scale, not multiply inverse), given this is done offline -// the slight precison diff can produce obvious wrong value due to "nearest round" behavior for NN mode -inline float MKLDNNInterpolateNode::coordTransToInput(int outCoord, float scale, int inShape, int outShape) { - if (scale == 1.0f || (inShape == outShape)) { - return outCoord; - } - switch (coordTransMode) { - case InterpolateCoordTransMode::half_pixel: { - return (outCoord + 0.5f) / scale - 0.5f; - break; - } - case InterpolateCoordTransMode::pytorch_half_pixel: { - if (outShape > 1) - return (outCoord + 0.5f) / scale - 0.5f; - else - return 0; - break; - } - case InterpolateCoordTransMode::asymmetric: { - return static_cast(outCoord) / scale; - break; - } - case InterpolateCoordTransMode::tf_half_pixel_for_nn: { - return (outCoord + 0.5f) / scale; - break; - } - case InterpolateCoordTransMode::align_corners: { - if (outShape > 1) - return outCoord * (static_cast(inShape - 1) / static_cast(outShape - 1)); - else - return 0; - break; - } - default: { - IE_THROW() << errorPrefix << " does not support specified coordinate transformation mode"; - break; - } - } -} - -inline int MKLDNNInterpolateNode::nearestRound(float originCoord, bool isDownsample) { - switch (nearestMode) { - case InterpolateNearestMode::round_prefer_floor: { - if (originCoord == (static_cast(originCoord) + 0.5f)) - return static_cast(std::floor(originCoord)); - else - return static_cast(std::round(originCoord)); - break; - } - case InterpolateNearestMode::round_prefer_ceil: { - return static_cast(std::round(originCoord)); - break; - } - case InterpolateNearestMode::floor: { - return static_cast(std::floor(originCoord)); - break; - } - case InterpolateNearestMode::ceil: { - return static_cast(std::ceil(originCoord)); - break; - } - case InterpolateNearestMode::simple: { - if (isDownsample) - return static_cast(std::ceil(originCoord)); - else - return static_cast(originCoord); - } - default: { - IE_THROW() << errorPrefix << " does not support specified nearest round mode"; - break; - } +size_t MKLDNNInterpolateNode::getSpatialDimsNum(const Dim rank) { + switch (rank) { + case 1: + case 3: + return 1; + case 2: + case 4: + return 2; + case 5: + return 3; + default: + IE_THROW() << "Can't define number spatial"; } } bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const { - if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { + if (!mayiuse(cpu::x64::sse41) || interpAttrs.mode == InterpolateMode::linear) { return false; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h index b39960040a6..5c423711c7f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h @@ -97,6 +97,7 @@ public: void createPrimitive() override; bool created() const override; void execute(mkldnn::stream strm) override; + void executeDynamicImpl(mkldnn::stream strm) override; bool canBeInPlace() const override { return false; } @@ -104,83 +105,141 @@ public: static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + bool needShapeInfer() const override; + std::vector shapeInfer() const override; + bool needPrepareParams() const override; + void prepareParams() override; + private: - // nearest neighbor - void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); - void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); - void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + struct InterpolateAttrs { + InterpolateMode mode; + InterpolateCoordTransMode coordTransMode; + InterpolateNearestMode nearestMode; + bool antialias; + float cubeCoeff; + std::vector padBegin; + std::vector padEnd; + InferenceEngine::Precision inPrc; + InferenceEngine::Precision outPrc; + InterpolateLayoutType layout; + } interpAttrs; - // onnx linear - void linearOnnxCF(int outCoord, float scale, int inShape, int outShape, int& index0, int& index1, float& weight0, float& weight1); - void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); - void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); - void linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + class InterpolateExecutor { + public: + InterpolateExecutor(const InterpolateAttrs& interpAttrs, + const VectorDims &srcDims, + const VectorDims &dstDims, + const std::vector &dataScales); - // cubic - std::vector getCubicCoeffs(float mantissa, float a); - void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); - void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); - void cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); + virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) = 0; + virtual ~InterpolateExecutor() = default; + VectorDims getSrcDimPad5d() const { return srcDimPad5d; } - // linear - void linearInterpolation(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, - float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias); + private: + void buildTblNN(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, + InterpolateLayoutType layout, InterpolateNearestMode nearestMode); + void buildTblLinearOnnx(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, + InterpolateLayoutType layout); + void buildTblLinear(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, int kernel_width, + bool antialias); + void buildTblCubic(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, float cubicCoeff, + InterpolateLayoutType layout); - void buildTblNN(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector& dataScales, InterpolateLayoutType layout); - void buildTblLinearOnnx(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector& dataScales, InterpolateLayoutType layout); - void buildTblLinear(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector& dataScales, int kernel_width, bool antialias); - void buildTblCubic(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector& dataScales, float cubicCoeff, InterpolateLayoutType layout); + float coordTransToInput(int outCoord, float scale, int inShape, int outShape) const; + int nearestRound(float origin, bool isDownsample, InterpolateNearestMode nearestMode) const; + void linearOnnxCF(int outCoord, float scale, int inShape, int outShape, int& index0, int& index1, float& weight0, float& weight1); + std::vector getCubicCoeffs(float mantissa, float a); - void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false); + protected: + InterpolateMode mode; + InterpolateCoordTransMode coordTransMode; + InterpolateLayoutType configured_for_layout; + VectorDims srcDimPad5d, dstDim5d; + InferenceEngine::Precision inputPrec, outputPrec; + size_t srcDataSize, dstDataSize; + int spatialDimSize; + size_t dataRank; + std::vector indexTable; + }; + std::shared_ptr execPtr = nullptr; - inline float coordTransToInput(int outCoord, float scale, int inShape, int outShape); - inline int nearestRound(float origin, bool isDownsample); - float getValue(const uint8_t *base, size_t offset, InferenceEngine::Precision prec); - void setValue(uint8_t *base, size_t offset, float value, InferenceEngine::Precision prec); + class InterpolateJitExecutor : public InterpolateExecutor { + public: + InterpolateJitExecutor(const InterpolateAttrs& interpAttrs, + const VectorDims &srcDims, + const VectorDims &dstDims, + const std::vector &dataScales, + const mkldnn::primitive_attr &attr); - SizeVector getPaddedInputShape(); - std::vector getScales(); + void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override; - static const size_t DATA_ID = 0; - static const size_t TARGET_SHAPE_ID = 1; - static const size_t SCALES_ID = 2; - static const size_t AXES_ID = 3; - const int LINEAR_KERNEL = 2; - const int CUBIC_GRID_LEN = 4; + private: + // nearest neighbor + void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + + // onnx linear + void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + + // cubic + void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); + void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); + + private: + std::shared_ptr interpolateKernel = nullptr; + }; + + class InterpolateRefExecutor : public InterpolateExecutor { + public: + InterpolateRefExecutor(const InterpolateAttrs& interpAttrs, + const VectorDims &srcDims, + const VectorDims &dstDims, + const std::vector &_dataScales) : dataScales(_dataScales), antialias(interpAttrs.antialias), + InterpolateExecutor(interpAttrs, srcDims, dstDims, _dataScales) {} + + void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override; + + private: + void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + void linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + + void cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); + void linearInterpolation(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, + float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias); + + static float getValue(const uint8_t *base, size_t offset, InferenceEngine::Precision prec); + static void setValue(uint8_t *base, size_t offset, float value, InferenceEngine::Precision prec); + + private: + bool antialias; + std::vector dataScales; + }; + + void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false); + + static SizeVector getPaddedInputShape(const VectorDims &srcDims, const std::vector &padBegin, const std::vector &padEnd); + std::vector getScales(const VectorDims &srcDimPad, const VectorDims &dstDim); + static size_t getSpatialDimsNum(const Dim rank); + + static constexpr size_t DATA_ID = 0; + static constexpr size_t TARGET_SHAPE_ID = 1; + static constexpr size_t SCALES_ID = 2; + static constexpr size_t AXES_ID = 3; + static constexpr int CUBIC_GRID_LEN = 4; - InterpolateMode mode; - InterpolateCoordTransMode coordTransMode = InterpolateCoordTransMode::half_pixel; - bool antialias = false; - std::vector padBegin; - std::vector padEnd; bool hasPad = false; - InterpolateNearestMode nearestMode = InterpolateNearestMode::round_prefer_floor; InterpolateShapeCalcMode shapeCalcMode; - float cubeCoeff = -0.75; - bool isAxesSpecified = false; - // axes and scales from buffer, partical size. std::vector axes; - std::vector scales; - // target shape is dst dim, full size. - SizeVector dstDim; - SizeVector srcDim; - SizeVector srcDimPad; - int spatialDimSize = 1; mkldnn::primitive_attr attr; - std::vector PostOpsIntBlobMemory; - InferenceEngine::Precision inputPrec, outputPrec; - size_t srcDataSize = 0; - size_t dstDataSize = 0; + std::vector lastScales; + std::vector lastSizes; - InterpolateLayoutType configured_for_layout = InterpolateLayoutType::planar; - - std::vector indexTable; - - std::shared_ptr interpolateKernel = nullptr; + VectorDims lastOutputDims; std::string errorPrefix; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h index 104b6f65e0b..1d91199f95a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h @@ -28,7 +28,7 @@ public: return false; } - void prepareParams() override;; + void prepareParams() override; void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); } static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp index 7392cbdb530..f3bbbe6d1b4 100644 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp @@ -2,35 +2,95 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/builders.hpp" #include "test_utils/cpu_test_utils.hpp" #include "test_utils/fusing_test_utils.hpp" +#include "functional_test_utils/ov_tensor_utils.hpp" +#include "openvino/core/preprocess/pre_post_process.hpp" -using namespace InferenceEngine; +using namespace ov::test; using namespace CPUTestUtils; +using ngraph::helpers::operator<<; namespace CPULayerTestsDefinitions { -typedef std::tuple< - LayerTestsDefinitions::InterpolateLayerTestParams, - CPUSpecificParams, - fusingSpecificParams, - std::map - > InterpolateLayerCPUTestParamsSet; +using InterpolateSpecificParams = std::tuple, // PadBegin + std::vector, // PadEnd + double>; // Cube coef + +using ShapeParams = std::tuple>, // scales or sizes values + std::vector>; // axes + +using InterpolateLayerCPUTestParamsSet = std::tuple>; class InterpolateLayerCPUTest : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing { + virtual public SubgraphBaseTest, public CpuTestWithFusing { public: static std::string getTestCaseName(testing::TestParamInfo obj) { - LayerTestsDefinitions::InterpolateLayerTestParams basicParamsSet; + InterpolateSpecificParams specificParams; + ShapeParams shapeParams; + ElementType prec; CPUSpecificParams cpuParams; fusingSpecificParams fusingParams; std::map additionalConfig; - std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param; + std::tie(specificParams, shapeParams, prec, cpuParams, fusingParams, additionalConfig) = obj.param; + + ngraph::op::v4::Interpolate::InterpolateMode mode; + ngraph::op::v4::Interpolate::CoordinateTransformMode transfMode; + ngraph::op::v4::Interpolate::NearestMode nearMode; + bool antiAlias; + std::vector padBegin; + std::vector padEnd; + double cubeCoef; + std::tie(mode, transfMode, nearMode, antiAlias, padBegin, padEnd, cubeCoef) = specificParams; + + ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode; + InputShape inputShapes; + ngraph::helpers::InputLayerType shapeInputType; + std::vector> shapeDataForInput; + std::vector axes; + std::tie(shapeCalcMode, inputShapes, shapeInputType, shapeDataForInput, axes) = shapeParams; std::ostringstream result; - result << LayerTestsDefinitions::InterpolateLayerTest::getTestCaseName(testing::TestParamInfo( - basicParamsSet, 0)); + result << "ShapeCalcMode=" << shapeCalcMode << "_"; + result << "IS="; + result << CommonTestUtils::partialShape2str({inputShapes.first}) << "_"; + result << "TS="; + for (const auto& shape : inputShapes.second) { + result << CommonTestUtils::vec2str(shape) << "_"; + } + if (shapeCalcMode == ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES) { + result << "Scales="; + } else { + result << "Sizes="; + } + for (const auto &data : shapeDataForInput) { + result << CommonTestUtils::vec2str(data) << "_"; + } + result << shapeInputType << "_"; + result << "InterpolateMode=" << mode << "_"; + result << "CoordinateTransformMode=" << transfMode << "_"; + result << "NearestMode=" << nearMode << "_"; + result << "CubeCoef=" << cubeCoef << "_"; + result << "Antialias=" << antiAlias << "_"; + result << "PB=" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE=" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "Axes=" << CommonTestUtils::vec2str(axes) << "_"; + result << "PRC=" << prec << "_"; result << CPUTestsBase::getTestCaseName(cpuParams); result << CpuTestWithFusing::getTestCaseName(fusingParams); @@ -45,75 +105,168 @@ public: return result.str(); } + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (int i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::runtime::Tensor tensor; + + if (i == 1) { + if (shapeCalcMode == ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES) { + tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i], sizes[inferRequestNum].data()); + } else { + tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i], scales[inferRequestNum].data()); + } + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); + } + + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + inferRequestNum++; + } + + void configure_model() override { + ov::preprocess::PrePostProcessor p(function); + { + auto& params = function->get_parameters(); + for (size_t i = 0; i < params.size(); i++) { + if (i > 0) { + continue; + } + if (inType != ov::element::Type_t::undefined) { + p.input(ov::preprocess::InputInfo(i) + .tensor(ov::preprocess::InputTensorInfo().set_element_type(inType))); + } + } + } + { + auto results = function->get_results(); + for (size_t i = 0; i < results.size(); i++) { + if (outType != ov::element::Type_t::undefined) { + p.output(ov::preprocess::OutputInfo(i) + .tensor(ov::preprocess::OutputTensorInfo().set_element_type(outType))); + } + } + } + function = p.build(); + } + protected: + std::vector> scales; + std::vector> sizes; + ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode; + size_t inferRequestNum = 0; + void SetUp() override { - LayerTestsDefinitions::InterpolateLayerTestParams basicParamsSet; + targetDevice = CommonTestUtils::DEVICE_CPU; + + InterpolateSpecificParams specificParams; + ShapeParams shapeParams; + ElementType ngPrc; CPUSpecificParams cpuParams; fusingSpecificParams fusingParams; std::map additionalConfig; - std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = this->GetParam(); + std::tie(specificParams, shapeParams, ngPrc, cpuParams, fusingParams, additionalConfig) = this->GetParam(); std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; std::tie(postOpMgrPtr, fusedOps) = fusingParams; - - LayerTestsDefinitions::InterpolateSpecificParams interpolateParams; - std::vector inputShape; - std::vector targetShape; - Precision netPrecision; - std::map additional_config; - std::tie(interpolateParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, - targetShape, targetDevice, additional_config) = basicParamsSet; + configuration.insert(additionalConfig.begin(), additionalConfig.end()); ngraph::op::v4::Interpolate::InterpolateMode mode; - ngraph::op::v4::Interpolate::ShapeCalcMode shapeCalcMode; - ngraph::op::v4::Interpolate::CoordinateTransformMode coordinateTransformMode; - ngraph::op::v4::Interpolate::NearestMode nearestMode; - bool antialias; - std::vector padBegin, padEnd; + ngraph::op::v4::Interpolate::CoordinateTransformMode transfMode; + ngraph::op::v4::Interpolate::NearestMode nearMode; + bool antiAlias; + std::vector padBegin; + std::vector padEnd; double cubeCoef; + std::tie(mode, transfMode, nearMode, antiAlias, padBegin, padEnd, cubeCoef) = specificParams; + + InputShape dataShape; + ngraph::helpers::InputLayerType shapeInputType; + std::vector> shapeDataForInput; std::vector axes; - std::vector scales; - std::tie(mode, shapeCalcMode, coordinateTransformMode, nearestMode, antialias, padBegin, padEnd, cubeCoef, axes, scales) = interpolateParams; - inPrc = outPrc = netPrecision; - configuration.insert(additionalConfig.begin(), additionalConfig.end()); - using ShapeCalcMode = ngraph::op::v4::Interpolate::ShapeCalcMode; + std::tie(shapeCalcMode, dataShape, shapeInputType, shapeDataForInput, axes) = shapeParams; - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + if (shapeCalcMode == ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES) { + scales = shapeDataForInput; + sizes.resize(scales.size(), std::vector(scales.front().size(), 0)); + } else { + sizes.resize(shapeDataForInput.size()); + for (size_t i = 0; i < shapeDataForInput.size(); i++) { + for (size_t j = 0; j < shapeDataForInput[i].size(); j++) { + sizes[i].push_back(shapeDataForInput[i][j]); + } + } + scales.resize(sizes.size(), std::vector(sizes.front().size(), 0)); + } - auto constant = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {targetShape.size()}, targetShape); + std::vector inputShapes; + inputShapes.push_back(dataShape); + if (shapeInputType == ngraph::helpers::InputLayerType::PARAMETER) { + inputShapes.push_back(InputShape({static_cast(axes.size())}, std::vector(dataShape.second.size(), {axes.size()}))); + } - auto scales_const = ngraph::opset3::Constant(ngraph::element::Type_t::f32, {scales.size()}, scales); + if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { + inType = outType = ngPrc = ElementType::bf16; + rel_threshold = 1e-2f; + } else { + inType = outType = ngPrc; + } - auto scalesInput = std::make_shared(scales_const); + init_input_shapes(inputShapes); - auto secondaryInput = std::make_shared(constant); + auto params = ngraph::builder::makeDynamicParams(ngPrc, {inputDynamicShapes.front()}); + + std::shared_ptr sizesInput, scalesInput; + if (shapeCalcMode == ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES) { + if (shapeInputType == ngraph::helpers::InputLayerType::PARAMETER) { + auto paramNode = std::make_shared(ngraph::element::Type_t::f32, ov::Shape{scales.front().size()}); + params.push_back(paramNode); + scalesInput = paramNode; + } else { + scalesInput = std::make_shared(ngraph::element::Type_t::f32, ov::Shape{scales.front().size()}, scales.front()); + } + sizesInput = std::make_shared(ngraph::element::Type_t::i32, ov::Shape{sizes.front().size()}, sizes.front()); + } else { + if (shapeInputType == ngraph::helpers::InputLayerType::PARAMETER) { + auto paramNode = std::make_shared(ngraph::element::Type_t::i32, ov::Shape{sizes.front().size()}); + params.push_back(paramNode); + sizesInput = paramNode; + } else { + sizesInput = std::make_shared(ngraph::element::Type_t::i32, ov::Shape{sizes.front().size()}, sizes.front()); + } + scalesInput = std::make_shared(ngraph::element::Type_t::f32, ov::Shape{scales.front().size()}, scales.front()); + } + auto axesInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{axes.size()}, axes); + + for (size_t i = 0; i < params.size(); i++) { + params[i]->set_friendly_name(std::string("param_") + std::to_string(i)); + } + + ngraph::op::v4::Interpolate::InterpolateAttrs interpAttr{mode, shapeCalcMode, padBegin, padEnd, transfMode, nearMode, + antiAlias, cubeCoef}; - auto axesConst = ngraph::opset3::Constant(ngraph::element::Type_t::i64, {axes.size()}, axes); - auto axesInput = std::make_shared(axesConst); - ngraph::op::v4::Interpolate::InterpolateAttrs interpolateAttributes{mode, shapeCalcMode, padBegin, - padEnd, coordinateTransformMode, nearestMode, antialias, cubeCoef}; auto interpolate = std::make_shared(params[0], - secondaryInput, + sizesInput, scalesInput, axesInput, - interpolateAttributes); - function = makeNgraphFunction(ngPrc, params, interpolate, "interpolate"); + interpAttr); + + function = makeNgraphFunction(ngPrc, params, interpolate, "InterpolateCPU"); + if (selectedType.empty()) { selectedType = getPrimitiveType(); } - selectedType.push_back('_'); - if (additionalConfig.count(PluginConfigParams::KEY_ENFORCE_BF16) && additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) - selectedType += "BF16"; - else - selectedType += netPrecision.name(); + selectedType = makeSelectedTypeStr(selectedType, ngPrc); } }; TEST_P(InterpolateLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() - Run(); + run(); CheckPluginRelatedResults(executableNetwork, "Interpolate"); } @@ -122,14 +275,14 @@ namespace { /* CPU PARAMS */ std::vector filterCPUInfoForDevice() { std::vector resCPUParams; - if (with_cpu_x86_avx512f()) { + if (InferenceEngine::with_cpu_x86_avx512f()) { resCPUParams.push_back(CPUSpecificParams{{nChw16c, x, x, x}, {nChw16c}, {"jit_avx512"}, "jit_avx512"}); resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx512"}, "jit_avx512"}); - } else if (with_cpu_x86_avx2()) { + } else if (InferenceEngine::with_cpu_x86_avx2()) { resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_avx2"}, "jit_avx2"}); resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_avx2"}, "jit_avx2"}); resCPUParams.push_back(CPUSpecificParams{{nchw, x, x, x}, {nchw}, {"jit_avx2"}, "jit_avx2"}); - } else if (with_cpu_x86_sse42()) { + } else if (InferenceEngine::with_cpu_x86_sse42()) { resCPUParams.push_back(CPUSpecificParams{{nChw8c, x, x, x}, {nChw8c}, {"jit_sse42"}, "jit_sse42"}); resCPUParams.push_back(CPUSpecificParams{{nhwc, x, x, x}, {nhwc}, {"jit_sse42"}, "jit_sse42"}); } else { @@ -138,11 +291,6 @@ std::vector filterCPUInfoForDevice() { return resCPUParams; } /* ========== */ - -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32 -}; - const std::vector coordinateTransformModes = { ngraph::op::v4::Interpolate::CoordinateTransformMode::TF_HALF_PIXEL_FOR_NN, ngraph::op::v4::Interpolate::CoordinateTransformMode::PYTORCH_HALF_PIXEL, @@ -168,11 +316,6 @@ const std::vector defNearestModes = { ngraph::op::v4::Interpolate::NearestMode::ROUND_PREFER_FLOOR, }; -const std::vector> pads = { - {0, 0, 0, 0}, - {0, 0, 1, 1}, -}; - const std::vector antialias = { false, }; @@ -181,152 +324,178 @@ const std::vector cubeCoefs = { -0.75f, }; -const std::vector> defaultAxes = { - {0, 1, 2, 3} -}; - -const std::vector> defaultScales = { - {1.f, 1.f, 1.25f, 1.5f} -}; - -const auto interpolateCasesNN = ::testing::Combine( - ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::nearest), - ::testing::ValuesIn(shapeCalculationMode), - ::testing::ValuesIn(coordinateTransformModes), - ::testing::ValuesIn(nearestModes), - ::testing::ValuesIn(antialias), - ::testing::ValuesIn(pads), - ::testing::ValuesIn(pads), - ::testing::ValuesIn(cubeCoefs), - ::testing::ValuesIn(defaultAxes), - ::testing::ValuesIn(defaultScales)); - -const auto interpolateCasesLinearOnnx = ::testing::Combine( - ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx), - ::testing::ValuesIn(shapeCalculationMode), - ::testing::ValuesIn(coordinateTransformModes), - ::testing::ValuesIn(defNearestModes), - ::testing::ValuesIn(antialias), - ::testing::ValuesIn(pads), - ::testing::ValuesIn(pads), - ::testing::ValuesIn(cubeCoefs), - ::testing::ValuesIn(defaultAxes), - ::testing::ValuesIn(defaultScales)); - -const auto interpolateCasesLinear = ::testing::Combine( - ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear), - ::testing::ValuesIn(shapeCalculationMode), - ::testing::ValuesIn(coordinateTransformModes), - ::testing::ValuesIn(defNearestModes), - ::testing::ValuesIn(antialias), - ::testing::ValuesIn(pads), - ::testing::ValuesIn(pads), - ::testing::ValuesIn(cubeCoefs), - ::testing::ValuesIn(defaultAxes), - ::testing::ValuesIn(defaultScales)); - -const auto interpolateCasesCubic = ::testing::Combine( - ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::cubic), - ::testing::ValuesIn(shapeCalculationMode), - ::testing::ValuesIn(coordinateTransformModes), - ::testing::ValuesIn(defNearestModes), - ::testing::ValuesIn(antialias), - ::testing::ValuesIn(pads), - ::testing::ValuesIn(pads), - ::testing::ValuesIn(cubeCoefs), - ::testing::ValuesIn(defaultAxes), - ::testing::ValuesIn(defaultScales)); - const std::vector interpolateFusingParamsSet{ emptyFusingSpec, - fusingRelu, fusingSwish, - fusingFakeQuantizePerChannelRelu, + fusingFakeQuantizePerTensorRelu, }; -std::map additional_config = {}; - std::vector> filterAdditionalConfig() { - if (with_cpu_x86_avx512f()) { + if (InferenceEngine::with_cpu_x86_avx512f()) { return { - {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, - {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}} + {{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO}}, + {{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::YES}} }; } else { return { // default config as an stub for target without avx512, otherwise all tests with BF16 in its name are skipped - {{PluginConfigParams::KEY_PERF_COUNT, PluginConfigParams::NO}} + {{InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::NO}} }; } } +const std::vector> pads4D = { + {0, 0, 0, 0}, + {0, 0, 1, 1}, +}; + +const std::vector> defaultAxes4D = { + {0, 1, 2, 3} +}; + +const std::vector shapeParams4D = { + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{}, {{1, 11, 4, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1.f, 1.f, 1.25f, 1.5f}}, + defaultAxes4D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, + InputShape{{}, {{1, 11, 4, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1, 11, 5, 6}}, + defaultAxes4D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1.f, 1.f, 1.25f, 1.5f}}, + defaultAxes4D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, + InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {1, 11, 5, 5}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1, 11, 5, 6}}, + defaultAxes4D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}}, + defaultAxes4D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, + InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{1, 11, 6, 7}, {2, 7, 8, 7}}, + defaultAxes4D.front() + } +}; + +const auto interpolateCasesNN = ::testing::Combine( + ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::nearest), + ::testing::ValuesIn(coordinateTransformModes), + ::testing::ValuesIn(nearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads4D), + ::testing::ValuesIn(pads4D), + ::testing::ValuesIn(cubeCoefs)); + INSTANTIATE_TEST_SUITE_P(smoke_InterpolateNN_Layout_Test, InterpolateLayerCPUTest, ::testing::Combine( - ::testing::Combine( - interpolateCasesNN, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 21, 4, 4})), - ::testing::Values(std::vector({1, 21, 5, 6})), - ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(additional_config)), + interpolateCasesNN, + ::testing::ValuesIn(shapeParams4D), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(filterCPUInfoForDevice()), ::testing::ValuesIn(interpolateFusingParamsSet), ::testing::ValuesIn(filterAdditionalConfig())), InterpolateLayerCPUTest::getTestCaseName); +const std::vector shapeParams4D_fixed_C = { + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{}, {{1, 11, 4, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1.f, 1.f, 1.25f, 1.5f}}, + defaultAxes4D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, + InputShape{{-1, 16, -1, -1}, {{1, 16, 4, 4}, {1, 16, 6, 5}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1, 16, 6, 7}}, + defaultAxes4D.front() + } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_InterpolateNN_Layout_PerChannelFuse_Test, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCasesNN, + ::testing::ValuesIn(shapeParams4D_fixed_C), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice()), + ::testing::Values(fusingFakeQuantizePerChannelRelu), + ::testing::ValuesIn(filterAdditionalConfig())), + InterpolateLayerCPUTest::getTestCaseName); + +const auto interpolateCasesLinearOnnx = ::testing::Combine( + ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx), + ::testing::ValuesIn(coordinateTransformModes), + ::testing::ValuesIn(defNearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads4D), + ::testing::ValuesIn(pads4D), + ::testing::ValuesIn(cubeCoefs)); + INSTANTIATE_TEST_SUITE_P(smoke_InterpolateLinearOnnx_Layout_Test, InterpolateLayerCPUTest, ::testing::Combine( - ::testing::Combine( - interpolateCasesLinearOnnx, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 21, 4, 4})), - ::testing::Values(std::vector({1, 21, 5, 6})), - ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(additional_config)), + interpolateCasesLinearOnnx, + ::testing::ValuesIn(shapeParams4D), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(filterCPUInfoForDevice()), ::testing::ValuesIn(interpolateFusingParamsSet), ::testing::ValuesIn(filterAdditionalConfig())), InterpolateLayerCPUTest::getTestCaseName); +const auto interpolateCasesLinear = ::testing::Combine( + ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear), + ::testing::ValuesIn(coordinateTransformModes), + ::testing::ValuesIn(defNearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads4D), + ::testing::ValuesIn(pads4D), + ::testing::ValuesIn(cubeCoefs)); + INSTANTIATE_TEST_SUITE_P(smoke_InterpolateLinear_Layout_Test, InterpolateLayerCPUTest, ::testing::Combine( - ::testing::Combine( - interpolateCasesLinear, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 21, 4, 4})), - ::testing::Values(std::vector({1, 21, 5, 6})), - ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(additional_config)), + interpolateCasesLinear, + ::testing::ValuesIn(shapeParams4D), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(filterCPUInfoForDevice()), ::testing::ValuesIn(interpolateFusingParamsSet), ::testing::ValuesIn(filterAdditionalConfig())), InterpolateLayerCPUTest::getTestCaseName); +const auto interpolateCasesCubic = ::testing::Combine( + ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::cubic), + ::testing::ValuesIn(coordinateTransformModes), + ::testing::ValuesIn(defNearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads4D), + ::testing::ValuesIn(pads4D), + ::testing::ValuesIn(cubeCoefs)); + INSTANTIATE_TEST_SUITE_P(smoke_InterpolateCubic_Layout_Test, InterpolateLayerCPUTest, ::testing::Combine( - ::testing::Combine( - interpolateCasesCubic, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 21, 4, 4})), - ::testing::Values(std::vector({1, 21, 5, 6})), - ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(additional_config)), + interpolateCasesCubic, + ::testing::ValuesIn(shapeParams4D), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(filterCPUInfoForDevice()), ::testing::ValuesIn(interpolateFusingParamsSet), ::testing::ValuesIn(filterAdditionalConfig())), @@ -335,14 +504,14 @@ INSTANTIATE_TEST_SUITE_P(smoke_InterpolateCubic_Layout_Test, InterpolateLayerCPU ////////////////////////5D///////////////////////////// std::vector filterCPUInfoForDevice5D() { std::vector resCPUParams; - if (with_cpu_x86_avx512f()) { + if (InferenceEngine::with_cpu_x86_avx512f()) { resCPUParams.push_back(CPUSpecificParams{{nCdhw16c, x, x, x}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512"}); resCPUParams.push_back(CPUSpecificParams{{ndhwc, x, x, x}, {ndhwc}, {"jit_avx512"}, "jit_avx512"}); - } else if (with_cpu_x86_avx2()) { + } else if (InferenceEngine::with_cpu_x86_avx2()) { resCPUParams.push_back(CPUSpecificParams{{nCdhw8c, x, x, x}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2"}); resCPUParams.push_back(CPUSpecificParams{{ndhwc, x, x, x}, {ndhwc}, {"jit_avx2"}, "jit_avx2"}); resCPUParams.push_back(CPUSpecificParams{{ncdhw, x, x, x}, {ncdhw}, {"jit_avx2"}, "jit_avx2"}); - } else if (with_cpu_x86_sse42()) { + } else if (InferenceEngine::with_cpu_x86_sse42()) { resCPUParams.push_back(CPUSpecificParams{{nCdhw8c, x, x, x}, {nCdhw8c}, {"jit_sse42"}, "jit_sse42"}); resCPUParams.push_back(CPUSpecificParams{{ndhwc, x, x, x}, {ndhwc}, {"jit_sse42"}, "jit_sse42"}); } else { @@ -359,70 +528,126 @@ const std::vector> defaultAxes5D = { {0, 1, 2, 3, 4} }; -const std::vector> defaultScales5D = { - {1.f, 1.f, 1.25f, 1.5f, 0.5f} +const std::vector shapeParams5D = { + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{}, {{1, 11, 4, 4, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1.f, 1.f, 1.25f, 1.5f, 0.5f}}, + defaultAxes5D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, + InputShape{{}, {{1, 11, 4, 4, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1, 11, 5, 6, 2}}, + defaultAxes5D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1.f, 1.f, 1.25f, 1.5f, 0.5f}}, + defaultAxes5D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, + InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {1, 11, 5, 5, 8}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{1, 11, 5, 6, 4}}, + defaultAxes5D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{1.f, 1.f, 1.25f, 1.5f, 0.5f}, {1.f, 1.f, 1.25f, 1.25f, 1.25f}}, + defaultAxes5D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, + InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{1, 11, 6, 7, 2}, {2, 7, 8, 7, 4}}, + defaultAxes5D.front() + }, }; const auto interpolateCasesLinearOnnx5D = ::testing::Combine( ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx), - ::testing::ValuesIn(shapeCalculationMode), ::testing::ValuesIn(coordinateTransformModes), ::testing::ValuesIn(nearestModes), ::testing::ValuesIn(antialias), ::testing::ValuesIn(pads5D), ::testing::ValuesIn(pads5D), - ::testing::ValuesIn(cubeCoefs), - ::testing::ValuesIn(defaultAxes5D), - ::testing::ValuesIn(defaultScales5D)); - -const auto interpolateCasesNN5D = ::testing::Combine( - ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::nearest), - ::testing::ValuesIn(shapeCalculationMode), - ::testing::ValuesIn(coordinateTransformModes), - ::testing::ValuesIn(defNearestModes), - ::testing::ValuesIn(antialias), - ::testing::ValuesIn(pads5D), - ::testing::ValuesIn(pads5D), - ::testing::ValuesIn(cubeCoefs), - ::testing::ValuesIn(defaultAxes5D), - ::testing::ValuesIn(defaultScales5D)); + ::testing::ValuesIn(cubeCoefs)); INSTANTIATE_TEST_SUITE_P(smoke_InterpolateLinearOnnx5D_Layout_Test, InterpolateLayerCPUTest, ::testing::Combine( - ::testing::Combine( - interpolateCasesLinearOnnx5D, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 21, 4, 4, 4})), - ::testing::Values(std::vector({1, 21, 5, 6, 2})), - ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(additional_config)), + interpolateCasesLinearOnnx5D, + ::testing::ValuesIn(shapeParams5D), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(filterCPUInfoForDevice5D()), ::testing::ValuesIn(interpolateFusingParamsSet), ::testing::ValuesIn(filterAdditionalConfig())), InterpolateLayerCPUTest::getTestCaseName); +const auto interpolateCasesNN5D = ::testing::Combine( + ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::nearest), + ::testing::ValuesIn(coordinateTransformModes), + ::testing::ValuesIn(defNearestModes), + ::testing::ValuesIn(antialias), + ::testing::ValuesIn(pads5D), + ::testing::ValuesIn(pads5D), + ::testing::ValuesIn(cubeCoefs)); + INSTANTIATE_TEST_SUITE_P(smoke_InterpolateNN5D_Layout_Test, InterpolateLayerCPUTest, ::testing::Combine( - ::testing::Combine( - interpolateCasesNN5D, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 21, 4, 4, 4})), - ::testing::Values(std::vector({1, 21, 5, 6, 2})), - ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::Values(additional_config)), + interpolateCasesNN5D, + ::testing::ValuesIn(shapeParams5D), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(filterCPUInfoForDevice5D()), ::testing::ValuesIn(interpolateFusingParamsSet), ::testing::ValuesIn(filterAdditionalConfig())), InterpolateLayerCPUTest::getTestCaseName); +// corner cases +const std::vector shapeParams4D_corner = { + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, + InputShape{{1, 11, 4, 4}, {{1, 11, 4, 4}, {1, 11, 4, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}}, + defaultAxes4D.front() + }, + ShapeParams{ + ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, + InputShape{{1, 11, 4, 4}, {{1, 11, 4, 4}, {1, 11, 4, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{1, 11, 6, 7}, {1, 11, 8, 7}}, + defaultAxes4D.front() + } +}; + +const auto interpolateCornerCases = ::testing::Combine( + ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::nearest), + ::testing::Values(ngraph::op::v4::Interpolate::CoordinateTransformMode::ASYMMETRIC), + ::testing::Values(ngraph::op::v4::Interpolate::NearestMode::SIMPLE), + ::testing::ValuesIn(antialias), + ::testing::Values(std::vector{0, 0, 0, 0}), + ::testing::Values(std::vector{0, 0, 0, 0}), + ::testing::ValuesIn(cubeCoefs)); + +INSTANTIATE_TEST_SUITE_P(smoke_Interpolate_corner_Layout_Test, InterpolateLayerCPUTest, + ::testing::Combine( + interpolateCornerCases, + ::testing::ValuesIn(shapeParams4D_corner), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(filterCPUInfoForDevice()), + ::testing::ValuesIn(interpolateFusingParamsSet), + ::testing::ValuesIn(filterAdditionalConfig())), + InterpolateLayerCPUTest::getTestCaseName); + } // namespace } // namespace CPULayerTestsDefinitions