From ac45196ce2cada0a4b5294bc05f4b3d7b102200c Mon Sep 17 00:00:00 2001 From: Chenhu Wang Date: Thu, 13 Jan 2022 15:01:02 +0800 Subject: [PATCH] [CPU] Interpolate runtime params cache (#9524) --- .../src/nodes/mkldnn_eltwise_node.cpp | 1 + .../src/nodes/mkldnn_interpolate_node.cpp | 207 ++++++++++++++---- .../src/nodes/mkldnn_interpolate_node.h | 35 ++- .../cpu/single_layer_tests/interpolate.cpp | 24 +- 4 files changed, 203 insertions(+), 64 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.cpp b/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.cpp index b546a32f741..a5fc77dd4cf 100644 --- a/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.cpp +++ b/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.cpp @@ -1934,6 +1934,7 @@ void MKLDNNEltwiseNode::prepareParams() { // together with the corresponding appendPostOps method to pass the scales and shifts pointers at runtime. // Until then we have to read them from the quantization_t directly, store them somewhere // and nullify them to get read of the address dependency in the key structure + fqDataPtrs.clear(); for (int i = 0; i < key.postOps.len(); ++i) { auto &data = key.postOps.get()->entry_[i].quantization.data; fqDataPtrs.insert(fqDataPtrs.end(), std::begin(data), std::end(data)); diff --git a/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.cpp b/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.cpp index 704ece9719d..b783b0d6584 100644 --- a/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.cpp +++ b/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.cpp @@ -80,8 +80,10 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi this->preamble(); - if (attr_.post_ops_.len() != 0) + if (attr_.post_ops_.len() != 0) { + mov(reg_post_ops_data, ptr[reg_params + GET_OFF(post_op_data)]); mov(reg_oc_off, ptr[reg_params + GET_OFF(oc_off)]); + } if (isa == cpu::x64::avx512_common) uni_vpxor(vmm_zero, vmm_zero, vmm_zero); @@ -183,7 +185,8 @@ private: Reg64 reg_tmp_64 = r10; Xbyak::Reg64 reg_oc_off = rax; - Xbyak::Reg64 reg_d_weights = rbx; + Xbyak::Reg64 reg_post_ops_data = rbx; + Xbyak::Reg64 reg_d_weights = reg_tmp_64; Xbyak::Reg64 reg_d_bias = rcx; Xbyak::Reg32 reg_index_offset = edx; @@ -1569,16 +1572,20 @@ private: int eltwise_inj_idx = 0; int depthwise_inj_idx = 0; int quantization_inj_idx = 0; + int post_ops_data_offset = 0; for (int i = 0; i < p.len(); i++) { auto& post_op = p.entry_[i]; if (post_op.is_eltwise()) { eltwise_injectors[eltwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1); eltwise_inj_idx++; } else if (post_op.is_depthwise()) { - mov(reg_d_weights, reinterpret_cast(post_op.depthwise.weights_data)); - mov(reg_d_bias, reinterpret_cast(post_op.depthwise.biases_data)); + mov(reg_d_weights, ptr[reg_post_ops_data + post_ops_data_offset]); add(reg_d_weights, reg_oc_off); + post_ops_data_offset += sizeof(float*); + mov(reg_d_bias, ptr[reg_post_ops_data + post_ops_data_offset]); add(reg_d_bias, reg_oc_off); + post_ops_data_offset += sizeof(float*); + // weight and bias is padded. scalar as vector. depthwise_injectors[depthwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1, reg_d_weights, reg_d_bias, is_broadcast); depthwise_inj_idx++; @@ -1588,23 +1595,102 @@ private: int s_idx = vmm_val.getIdx(); - quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_oc_off); + quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off); quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0, 0, is_broadcast); - quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_oc_off); + quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off); quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding, 0, is_broadcast); if (do_dequantization) { - quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_oc_off); + quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off); quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0, 0, is_broadcast); } + post_ops_data_offset += quantization_injectors[quantization_inj_idx]->memoryStep(); quantization_inj_idx++; } } } }; +namespace { +struct InterpolateKey { + MKLDNNInterpolateNode::InterpolateAttrs nodeAttrs; + VectorDims srcDims; + VectorDims dstDims; + std::vector dataScales; + mkldnn::primitive_attr attr; + + size_t hash() const; + bool operator==(const InterpolateKey& rhs) const; +}; + +size_t InterpolateKey::hash() const { + using namespace dnnl::impl; + using namespace dnnl::impl::primitive_hashing; + + size_t seed = 0; + + seed = hash_combine(seed, nodeAttrs.mode); + seed = hash_combine(seed, nodeAttrs.coordTransMode); + seed = hash_combine(seed, nodeAttrs.nearestMode); + seed = hash_combine(seed, nodeAttrs.layout); + + seed = hash_combine(seed, nodeAttrs.antialias); + seed = hash_combine(seed, nodeAttrs.cubeCoeff); + + seed = get_vector_hash(seed, nodeAttrs.padBegin); + seed = get_vector_hash(seed, nodeAttrs.padEnd); + + seed = hash_combine(seed, nodeAttrs.inPrc.getPrecVal()); + seed = hash_combine(seed, nodeAttrs.outPrc.getPrecVal()); + + seed = get_vector_hash(seed, srcDims); + seed = get_vector_hash(seed, dstDims); + seed = get_vector_hash(seed, dataScales); + + seed = hash_combine(seed, get_attr_hash(*attr.get())); + return seed; +} + +bool InterpolateKey::operator==(const InterpolateKey &rhs) const { + if (nodeAttrs.mode != rhs.nodeAttrs.mode) + return false; + if (nodeAttrs.coordTransMode != rhs.nodeAttrs.coordTransMode) + return false; + if (nodeAttrs.nearestMode != rhs.nodeAttrs.nearestMode) + return false; + if (nodeAttrs.layout != rhs.nodeAttrs.layout) + return false; + if (nodeAttrs.antialias != rhs.nodeAttrs.antialias) + return false; + if (nodeAttrs.cubeCoeff != rhs.nodeAttrs.cubeCoeff) + return false; + if (nodeAttrs.padBegin != rhs.nodeAttrs.padBegin) + return false; + if (nodeAttrs.padEnd != rhs.nodeAttrs.padEnd) + return false; + if (nodeAttrs.inPrc != rhs.nodeAttrs.inPrc) + return false; + if (nodeAttrs.outPrc != rhs.nodeAttrs.outPrc) + return false; + if (nodeAttrs.layout != rhs.nodeAttrs.layout) + return false; + + if (srcDims != rhs.srcDims) + return false; + if (dstDims != rhs.dstDims) + return false; + if (dataScales != rhs.dataScales) + return false; + if (!(*attr.get() == *rhs.attr.get())) + return false; + + return true; +} + +} // namespace + // shapeND: n c d h w // blockND: ncdhw cdhw dhw hw w 1 // index : 0 1 2 3 4 5 @@ -2027,31 +2113,63 @@ void MKLDNNInterpolateNode::prepareParams() { IE_THROW() << errorPrefix << " did not allocate target shape memory"; if (!scaleMemPtr || !scaleMemPtr->GetPrimitivePtr()) IE_THROW() << errorPrefix << " did not allocate scales memory"; - if (getSelectedPrimitiveDescriptor() == nullptr) + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; const auto &srcDims = srcMemPtr->getStaticDims(); const auto &dstDims = dstMemPtr->getStaticDims(); - setPostOps(attr, dstDims, true); std::vector dataScales = getScales(getPaddedInputShape(srcDims, interpAttrs.padBegin, interpAttrs.padEnd), dstDims); if (getOutputShapeAtPort(0).getRank() > 2 && (dataScales[0] != 1.f || dataScales[1] != 1.f)) { IE_THROW() << "Interpolate layer only supports resize on spatial dimensions(depth, height and width)"; } - if ((interpAttrs.mode == InterpolateMode::nearest || interpAttrs.mode == InterpolateMode::linear_onnx || interpAttrs.mode == InterpolateMode::cubic) && - ((interpAttrs.layout != InterpolateLayoutType::planar && mayiuse(cpu::x64::sse41)) || - (mayiuse(cpu::x64::avx2) && interpAttrs.inPrc == Precision::FP32))) { - execPtr = std::make_shared(interpAttrs, - srcDims, - dstDims, - dataScales, - attr); - } else { - execPtr = std::make_shared(interpAttrs, - srcDims, - dstDims, - dataScales); + + InterpolateKey key = {interpAttrs, srcDims, dstDims, dataScales, mkldnn::primitive_attr()}; + setPostOps(key.attr, dstDims, true); + + postOpsDataPtrs.clear(); + auto &postOps = (*key.attr.get()).post_ops_; + for (int i = 0; i < postOps.len(); ++i) { + auto &postOp = postOps.entry_[i]; + if (postOp.is_quantization()) { + auto &data = postOp.quantization.data; + postOpsDataPtrs.insert(postOpsDataPtrs.end(), std::begin(data), std::end(data)); + memset(data, 0, sizeof(data)); + } else if (postOp.is_depthwise()) { + auto &weights = postOp.depthwise.weights_data; + auto &biases = postOp.depthwise.biases_data; + postOpsDataPtrs.push_back(weights); + postOpsDataPtrs.push_back(biases); + weights = 0; + biases = 0; + } } + + auto buildExecutor = [&](const InterpolateKey& key) -> std::shared_ptr { + std::shared_ptr executor; + if ((key.nodeAttrs.mode == InterpolateMode::nearest || key.nodeAttrs.mode == InterpolateMode::linear_onnx || + key.nodeAttrs.mode == InterpolateMode::cubic) && + ((key.nodeAttrs.layout != InterpolateLayoutType::planar && mayiuse(cpu::x64::sse41)) || + (mayiuse(cpu::x64::avx2) && key.nodeAttrs.inPrc == Precision::FP32))) { + executor = std::make_shared(key.nodeAttrs, + key.srcDims, + key.dstDims, + key.dataScales, + key.attr); + } else { + executor = std::make_shared(key.nodeAttrs, + key.srcDims, + key.dstDims, + key.dataScales); + } + return executor; + }; + + auto cache = getRuntimeCache(); + auto result = cache->getOrCreate(key, buildExecutor); + execPtr = result.first; + lastOutputDims = dstDims; } @@ -2223,13 +2341,13 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) { src_data = src_data_origin; } - execPtr->exec(src_data, dst_data); + execPtr->exec(src_data, dst_data, static_cast(&postOpsDataPtrs[0])); } // for ndhwc and nCdhw8c[16c] // input may be f32/bf16/int8, fused->output varies -void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, - int ID, int IH, int IW, int OD, int OH, int OW) { +void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { int *index_d = static_cast(&indexTable[0]); int *index_h = static_cast(&indexTable[OD]); int *index_w = static_cast(&indexTable[OD + OH]); @@ -2254,6 +2372,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i arg.index = static_cast(&(index_w_kernel[0])); arg.work_amount = C; arg.oc_off = 0; + arg.post_op_data = post_ops_data_; (*interpolateKernel)(&arg); }); } else { // for blk @@ -2275,6 +2394,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i arg.index = static_cast(&(index_w_kernel[0])); arg.work_amount = static_cast(OW); arg.oc_off = cb * blk_size * sizeof(float); + arg.post_op_data = post_ops_data_; (*interpolateKernel)(&arg); } }); @@ -2282,8 +2402,8 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i } // batch end } -void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, - int OD, int OH, int OW) { +void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { int *index_d = static_cast(&indexTable[0]); int *index_h = static_cast(&indexTable[OD]); int *index_w = static_cast(&indexTable[OD + OH]); @@ -2308,11 +2428,12 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_p arg.index = static_cast(&index_kernel[0]); // need index_h and index_w in kernel, it's in continous memory so one param arg.oc_off = static_cast(c * sizeof(float)); // work_amount is OH(out loop) and OW(inner loop), can get in kernel from jcp. + arg.post_op_data = post_ops_data_; (*interpolateKernel)(&arg); }); } -void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, +void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { // FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, EndTopLeft:4, EndTopRight:5, EndBottomLeft:6, EndBottomRight:7 // weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5 @@ -2331,12 +2452,13 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8 arg.dst = out_ptr_nc; arg.work_amount = OW * OH * OD; arg.oc_off = static_cast(c * sizeof(float)); + arg.post_op_data = post_ops_data_; (*interpolateKernel)(&arg); }); } -void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, - int OD, int OH, int OW) { +void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { // left:OW right:OW Top:OH Bottom:OH Front:OD End:OD std::vector indexPtr(MAX_INPUT_INTERPOLATE, 0); std::vector weightPtr(MAX_INPUT_INTERPOLATE, 0); @@ -2401,12 +2523,14 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const ui arg.dst = out_ptr_ndhw; arg.work_amount = workAmount; arg.oc_off = 0; + arg.post_op_data = post_ops_data_; (*interpolateKernel)(&arg); } }); } -void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { +void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int IH, int IW, int OH, int OW) { const int idxNum = 1; int *xOrigin = static_cast(&indexTable[0]); float *xFactor = reinterpret_cast(&indexTable[OW]); @@ -2447,11 +2571,13 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t // for blk, src + IW*IH*blkSize, dst + OW*OH*blkSize, process the blkSize on next CB arg.work_amount = workAmount; arg.oc_off = 0; + arg.post_op_data = post_ops_data_; (*interpolateKernel)(&arg); }); } -void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { +void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int IH, int IW, int OH, int OW) { int tblAdvance = 0; int *xOrigin = static_cast(&indexTable[tblAdvance]); tblAdvance += OW; @@ -2481,6 +2607,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *i arg.weight_ptr[1] = yFactor; arg.work_amount = static_cast(OW * OH); arg.oc_off = static_cast(c * sizeof(float)); + arg.post_op_data = post_ops_data_; (*interpolateKernel)(&arg); }); } @@ -3284,7 +3411,7 @@ MKLDNNInterpolateNode::InterpolateJitExecutor::InterpolateJitExecutor(const Inte } } -void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) { +void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) { size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4]; size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4]; @@ -3294,25 +3421,25 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, switch (mode) { case InterpolateMode::nearest: { if (configured_for_layout == InterpolateLayoutType::planar) { - NNPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); + NNPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW); } else { - NNCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); + NNCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW); } break; } case InterpolateMode::linear_onnx: { if (configured_for_layout == InterpolateLayoutType::planar) { - linearOnnxPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); + linearOnnxPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW); } else { - linearOnnxCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW); + linearOnnxCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW); } break; } case InterpolateMode::cubic: { if (configured_for_layout == InterpolateLayoutType::planar) { - cubicPlanar(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW); + cubicPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, IH, IW, OH, OW); } else { - cubicCGathered(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW); + cubicCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, IH, IW, OH, OW); } break; } @@ -3322,7 +3449,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, } } -void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) { +void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) { size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4]; size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4]; diff --git a/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.h b/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.h index 5c423711c7f..828e2f4a398 100644 --- a/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.h +++ b/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.h @@ -68,6 +68,8 @@ struct jit_interpolate_call_args { void *dst; size_t work_amount; size_t oc_off; + //ptr to array of post op inputs pointers (flat list) + const void* post_op_data; }; struct jit_uni_interpolate_kernel { @@ -110,7 +112,6 @@ public: bool needPrepareParams() const override; void prepareParams() override; -private: struct InterpolateAttrs { InterpolateMode mode; InterpolateCoordTransMode coordTransMode; @@ -122,7 +123,10 @@ private: InferenceEngine::Precision inPrc; InferenceEngine::Precision outPrc; InterpolateLayoutType layout; - } interpAttrs; + }; + +private: + InterpolateAttrs interpAttrs; class InterpolateExecutor { public: @@ -131,7 +135,7 @@ private: const VectorDims &dstDims, const std::vector &dataScales); - virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) = 0; + virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) = 0; virtual ~InterpolateExecutor() = default; VectorDims getSrcDimPad5d() const { return srcDimPad5d; } @@ -171,20 +175,26 @@ private: const std::vector &dataScales, const mkldnn::primitive_attr &attr); - void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override; + void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) override; private: // nearest neighbor - void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); - void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); // onnx linear - void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); - void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); + void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); // cubic - void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); - void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW); + void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int IH, int IW, int OH, int OW); + void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, + int B, int C, int IH, int IW, int OH, int OW); private: std::shared_ptr interpolateKernel = nullptr; @@ -198,7 +208,7 @@ private: const std::vector &_dataScales) : dataScales(_dataScales), antialias(interpAttrs.antialias), InterpolateExecutor(interpAttrs, srcDims, dstDims, _dataScales) {} - void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override; + void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) override; private: void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW); @@ -234,7 +244,8 @@ private: bool isAxesSpecified = false; std::vector axes; - mkldnn::primitive_attr attr; + // 6 ptrs for each quantization, 2 ptrs for each depth_wise + std::vector postOpsDataPtrs; std::vector lastScales; std::vector lastSizes; diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp index c7681435f29..7cc1401b58c 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp @@ -374,16 +374,16 @@ const std::vector shapeParams4D_Smoke = { }, ShapeParams{ ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, - InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}}, + InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}}, ngraph::helpers::InputLayerType::PARAMETER, - {{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}}, + {{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}, {1.f, 1.f, 1.25f, 1.5f}}, defaultAxes4D.front() }, ShapeParams{ ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, - InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}}, + InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}}, ngraph::helpers::InputLayerType::PARAMETER, - {{1, 11, 6, 7}, {2, 7, 8, 7}}, + {{1, 11, 6, 7}, {2, 7, 8, 7}, {1, 11, 6, 7}}, defaultAxes4D.front() } }; @@ -391,14 +391,14 @@ const std::vector shapeParams4D_Smoke = { const std::vector shapeParams4D_Full = { ShapeParams{ ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, - InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}}, + InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}}, ngraph::helpers::InputLayerType::CONSTANT, {{1.f, 1.f, 1.25f, 1.5f}}, defaultAxes4D.front() }, ShapeParams{ ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, - InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {1, 11, 5, 5}}}, + InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {1, 11, 5, 5}, {1, 11, 4, 4}}}, ngraph::helpers::InputLayerType::CONSTANT, {{1, 11, 5, 6}}, defaultAxes4D.front() @@ -638,16 +638,16 @@ const std::vector shapeParams5D_Smoke = { }, ShapeParams{ ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, - InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}}, + InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}}, ngraph::helpers::InputLayerType::PARAMETER, - {{1.f, 1.f, 1.25f, 1.5f, 0.5f}, {1.f, 1.f, 1.25f, 1.25f, 1.25f}}, + {{1.f, 1.f, 1.25f, 1.5f, 0.5f}, {1.f, 1.f, 1.25f, 1.25f, 1.25f}, {1.f, 1.f, 1.25f, 1.5f, 0.5f}}, defaultAxes5D.front() }, ShapeParams{ ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, - InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}}, + InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}}, ngraph::helpers::InputLayerType::PARAMETER, - {{1, 11, 6, 7, 2}, {2, 7, 8, 7, 4}}, + {{1, 11, 6, 7, 2}, {2, 7, 8, 7, 4}, {1, 11, 6, 7, 2}}, defaultAxes5D.front() }, }; @@ -655,14 +655,14 @@ const std::vector shapeParams5D_Smoke = { const std::vector shapeParams5D_Full = { ShapeParams{ ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES, - InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}}, + InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}}, ngraph::helpers::InputLayerType::CONSTANT, {{1.f, 1.f, 1.25f, 1.5f, 0.5f}}, defaultAxes5D.front() }, ShapeParams{ ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES, - InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {1, 11, 5, 5, 8}}}, + InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {1, 11, 5, 5, 8}, {1, 11, 4, 4, 4}}}, ngraph::helpers::InputLayerType::CONSTANT, {{1, 11, 5, 6, 4}}, defaultAxes5D.front()