[CPU] Interpolate runtime params cache (#9524)

2022-01-13 15:01:02 +08:00 · 2022-01-13 15:01:02 +08:00 · ac45196ce2
commit ac45196ce2
parent f562e5572f
4 changed files with 203 additions and 64 deletions
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.cpp
@ -1934,6 +1934,7 @@ void MKLDNNEltwiseNode::prepareParams() {
    // together with the corresponding appendPostOps method to pass the scales and shifts pointers at runtime.
    // Until then we have to read them from the quantization_t directly, store them somewhere
    // and nullify them to get read of the address dependency in the key structure
+    fqDataPtrs.clear();
    for (int i = 0; i < key.postOps.len(); ++i) {
        auto &data = key.postOps.get()->entry_[i].quantization.data;
        fqDataPtrs.insert(fqDataPtrs.end(), std::begin(data), std::end(data));
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.cpp
@ -80,8 +80,10 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi

        this->preamble();

-        if (attr_.post_ops_.len() != 0)
+        if (attr_.post_ops_.len() != 0) {
+            mov(reg_post_ops_data, ptr[reg_params + GET_OFF(post_op_data)]);
            mov(reg_oc_off, ptr[reg_params + GET_OFF(oc_off)]);
+        }
        if (isa == cpu::x64::avx512_common)
            uni_vpxor(vmm_zero, vmm_zero, vmm_zero);

@ -183,7 +185,8 @@ private:
    Reg64 reg_tmp_64 = r10;

    Xbyak::Reg64 reg_oc_off = rax;
-    Xbyak::Reg64 reg_d_weights = rbx;
+    Xbyak::Reg64 reg_post_ops_data = rbx;
+    Xbyak::Reg64 reg_d_weights = reg_tmp_64;
    Xbyak::Reg64 reg_d_bias = rcx;
    Xbyak::Reg32 reg_index_offset = edx;

@ -1569,16 +1572,20 @@ private:
        int eltwise_inj_idx = 0;
        int depthwise_inj_idx = 0;
        int quantization_inj_idx = 0;
+        int post_ops_data_offset = 0;
        for (int i = 0; i < p.len(); i++) {
            auto& post_op = p.entry_[i];
            if (post_op.is_eltwise()) {
                eltwise_injectors[eltwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1);
                eltwise_inj_idx++;
            } else if (post_op.is_depthwise()) {
-                mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
-                mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+                mov(reg_d_weights, ptr[reg_post_ops_data + post_ops_data_offset]);
                add(reg_d_weights, reg_oc_off);
+                post_ops_data_offset += sizeof(float*);
+                mov(reg_d_bias, ptr[reg_post_ops_data + post_ops_data_offset]);
                add(reg_d_bias, reg_oc_off);
+                post_ops_data_offset += sizeof(float*);
+
                // weight and bias is padded. scalar as vector.
                depthwise_injectors[depthwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1, reg_d_weights, reg_d_bias, is_broadcast);
                depthwise_inj_idx++;
@ -1588,23 +1595,102 @@ private:

                int s_idx = vmm_val.getIdx();

-                quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_oc_off);
+                quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
                quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0, 0, is_broadcast);

-                quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_oc_off);
+                quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
                quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding, 0, is_broadcast);

                if (do_dequantization) {
-                    quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_oc_off);
+                    quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
                    quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0, 0, is_broadcast);
                }

+                post_ops_data_offset += quantization_injectors[quantization_inj_idx]->memoryStep();
                quantization_inj_idx++;
            }
        }
    }
 };

+namespace {
+struct InterpolateKey {
+    MKLDNNInterpolateNode::InterpolateAttrs nodeAttrs;
+    VectorDims srcDims;
+    VectorDims dstDims;
+    std::vector<float> dataScales;
+    mkldnn::primitive_attr attr;
+
+    size_t hash() const;
+    bool operator==(const InterpolateKey& rhs) const;
+};
+
+size_t InterpolateKey::hash() const {
+    using namespace dnnl::impl;
+    using namespace dnnl::impl::primitive_hashing;
+
+    size_t seed = 0;
+
+    seed = hash_combine(seed, nodeAttrs.mode);
+    seed = hash_combine(seed, nodeAttrs.coordTransMode);
+    seed = hash_combine(seed, nodeAttrs.nearestMode);
+    seed = hash_combine(seed, nodeAttrs.layout);
+
+    seed = hash_combine(seed, nodeAttrs.antialias);
+    seed = hash_combine(seed, nodeAttrs.cubeCoeff);
+
+    seed = get_vector_hash(seed, nodeAttrs.padBegin);
+    seed = get_vector_hash(seed, nodeAttrs.padEnd);
+
+    seed = hash_combine(seed, nodeAttrs.inPrc.getPrecVal());
+    seed = hash_combine(seed, nodeAttrs.outPrc.getPrecVal());
+
+    seed = get_vector_hash(seed, srcDims);
+    seed = get_vector_hash(seed, dstDims);
+    seed = get_vector_hash(seed, dataScales);
+
+    seed = hash_combine(seed, get_attr_hash(*attr.get()));
+    return seed;
+}
+
+bool InterpolateKey::operator==(const InterpolateKey &rhs) const {
+    if (nodeAttrs.mode != rhs.nodeAttrs.mode)
+        return false;
+    if (nodeAttrs.coordTransMode != rhs.nodeAttrs.coordTransMode)
+        return false;
+    if (nodeAttrs.nearestMode != rhs.nodeAttrs.nearestMode)
+        return false;
+    if (nodeAttrs.layout != rhs.nodeAttrs.layout)
+        return false;
+    if (nodeAttrs.antialias != rhs.nodeAttrs.antialias)
+        return false;
+    if (nodeAttrs.cubeCoeff != rhs.nodeAttrs.cubeCoeff)
+        return false;
+    if (nodeAttrs.padBegin != rhs.nodeAttrs.padBegin)
+        return false;
+    if (nodeAttrs.padEnd != rhs.nodeAttrs.padEnd)
+        return false;
+    if (nodeAttrs.inPrc != rhs.nodeAttrs.inPrc)
+        return false;
+    if (nodeAttrs.outPrc != rhs.nodeAttrs.outPrc)
+        return false;
+    if (nodeAttrs.layout != rhs.nodeAttrs.layout)
+        return false;
+
+    if (srcDims != rhs.srcDims)
+        return false;
+    if (dstDims != rhs.dstDims)
+        return false;
+    if (dataScales != rhs.dataScales)
+        return false;
+    if (!(*attr.get() == *rhs.attr.get()))
+        return false;
+
+    return true;
+}
+
+} // namespace
+
 // shapeND: n     c     d     h    w
 // blockND: ncdhw cdhw  dhw   hw   w    1
 // index  : 0      1    2     3    4    5
@ -2027,31 +2113,63 @@ void MKLDNNInterpolateNode::prepareParams() {
        IE_THROW() << errorPrefix << " did not allocate target shape memory";
    if (!scaleMemPtr || !scaleMemPtr->GetPrimitivePtr())
        IE_THROW() << errorPrefix << " did not allocate scales memory";
-    if (getSelectedPrimitiveDescriptor() == nullptr)
+    const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
+    if (selected_pd == nullptr)
        IE_THROW() << errorPrefix << " did not set preferable primitive descriptor";

    const auto &srcDims = srcMemPtr->getStaticDims();
    const auto &dstDims = dstMemPtr->getStaticDims();
-    setPostOps(attr, dstDims, true);

    std::vector<float> dataScales = getScales(getPaddedInputShape(srcDims, interpAttrs.padBegin, interpAttrs.padEnd), dstDims);
    if (getOutputShapeAtPort(0).getRank() > 2 && (dataScales[0] != 1.f || dataScales[1] != 1.f)) {
        IE_THROW() << "Interpolate layer only supports resize on spatial dimensions(depth, height and width)";
    }
-    if ((interpAttrs.mode == InterpolateMode::nearest || interpAttrs.mode == InterpolateMode::linear_onnx || interpAttrs.mode == InterpolateMode::cubic) &&
-        ((interpAttrs.layout != InterpolateLayoutType::planar && mayiuse(cpu::x64::sse41)) ||
-            (mayiuse(cpu::x64::avx2) && interpAttrs.inPrc == Precision::FP32))) {
-        execPtr = std::make_shared<InterpolateJitExecutor>(interpAttrs,
-                                                           srcDims,
-                                                           dstDims,
-                                                           dataScales,
-                                                           attr);
-    } else {
-        execPtr = std::make_shared<InterpolateRefExecutor>(interpAttrs,
-                                                           srcDims,
-                                                           dstDims,
-                                                           dataScales);
+
+    InterpolateKey key = {interpAttrs, srcDims, dstDims, dataScales, mkldnn::primitive_attr()};
+    setPostOps(key.attr, dstDims, true);
+
+    postOpsDataPtrs.clear();
+    auto &postOps = (*key.attr.get()).post_ops_;
+    for (int i = 0; i < postOps.len(); ++i) {
+        auto &postOp = postOps.entry_[i];
+        if (postOp.is_quantization()) {
+            auto &data = postOp.quantization.data;
+            postOpsDataPtrs.insert(postOpsDataPtrs.end(), std::begin(data), std::end(data));
+            memset(data, 0, sizeof(data));
+        } else if (postOp.is_depthwise()) {
+            auto &weights = postOp.depthwise.weights_data;
+            auto &biases = postOp.depthwise.biases_data;
+            postOpsDataPtrs.push_back(weights);
+            postOpsDataPtrs.push_back(biases);
+            weights = 0;
+            biases = 0;
+        }
    }
+
+    auto buildExecutor = [&](const InterpolateKey& key) -> std::shared_ptr<InterpolateExecutor> {
+        std::shared_ptr<InterpolateExecutor> executor;
+        if ((key.nodeAttrs.mode == InterpolateMode::nearest || key.nodeAttrs.mode == InterpolateMode::linear_onnx ||
+            key.nodeAttrs.mode == InterpolateMode::cubic) &&
+            ((key.nodeAttrs.layout != InterpolateLayoutType::planar && mayiuse(cpu::x64::sse41)) ||
+                (mayiuse(cpu::x64::avx2) && key.nodeAttrs.inPrc == Precision::FP32))) {
+            executor = std::make_shared<InterpolateJitExecutor>(key.nodeAttrs,
+                                                               key.srcDims,
+                                                               key.dstDims,
+                                                               key.dataScales,
+                                                               key.attr);
+        } else {
+            executor = std::make_shared<InterpolateRefExecutor>(key.nodeAttrs,
+                                                               key.srcDims,
+                                                               key.dstDims,
+                                                               key.dataScales);
+        }
+        return executor;
+    };
+
+    auto cache = getRuntimeCache();
+    auto result = cache->getOrCreate(key, buildExecutor);
+    execPtr = result.first;
+
    lastOutputDims = dstDims;
 }

@ -2223,13 +2341,13 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) {
        src_data = src_data_origin;
    }

-    execPtr->exec(src_data, dst_data);
+    execPtr->exec(src_data, dst_data, static_cast<const void*>(&postOpsDataPtrs[0]));
 }

 // for ndhwc and nCdhw8c[16c]
 // input may be f32/bf16/int8, fused->output varies
-void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C,
-                                                                int ID, int IH, int IW, int OD, int OH, int OW) {
+void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                                                                int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
    int *index_d = static_cast<int*>(&indexTable[0]);
    int *index_h = static_cast<int*>(&indexTable[OD]);
    int *index_w = static_cast<int*>(&indexTable[OD + OH]);
@ -2254,6 +2372,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i
                arg.index = static_cast<int*>(&(index_w_kernel[0]));
                arg.work_amount = C;
                arg.oc_off = 0;
+                arg.post_op_data = post_ops_data_;
                (*interpolateKernel)(&arg);
            });
        } else {  // for blk
@ -2275,6 +2394,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i
                    arg.index = static_cast<int*>(&(index_w_kernel[0]));
                    arg.work_amount = static_cast<size_t>(OW);
                    arg.oc_off = cb * blk_size * sizeof(float);
+                    arg.post_op_data = post_ops_data_;
                    (*interpolateKernel)(&arg);
                }
            });
@ -2282,8 +2402,8 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i
    }  // batch end
 }

-void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
-                                                             int OD, int OH, int OW) {
+void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                                                             int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
    int *index_d = static_cast<int*>(&indexTable[0]);
    int *index_h = static_cast<int*>(&indexTable[OD]);
    int *index_w = static_cast<int*>(&indexTable[OD + OH]);
@ -2308,11 +2428,12 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_p
        arg.index = static_cast<int*>(&index_kernel[0]);  // need index_h and index_w in kernel, it's in continous memory so one param
        arg.oc_off = static_cast<size_t>(c * sizeof(float));
        // work_amount is OH(out loop) and OW(inner loop), can get in kernel from jcp.
+        arg.post_op_data = post_ops_data_;
        (*interpolateKernel)(&arg);
    });
 }

-void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C,
+void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C,
                                                                     int ID, int IH, int IW, int OD, int OH, int OW) {
    // FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, EndTopLeft:4,   EndTopRight:5,   EndBottomLeft:6,   EndBottomRight:7
    // weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5
@ -2331,12 +2452,13 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8
        arg.dst = out_ptr_nc;
        arg.work_amount = OW * OH * OD;
        arg.oc_off = static_cast<size_t>(c * sizeof(float));
+        arg.post_op_data = post_ops_data_;
        (*interpolateKernel)(&arg);
    });
 }

-void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
-                                                                        int OD, int OH, int OW) {
+void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                                                                        int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
    // left:OW right:OW Top:OH Bottom:OH Front:OD End:OD
    std::vector<int*> indexPtr(MAX_INPUT_INTERPOLATE, 0);
    std::vector<float*> weightPtr(MAX_INPUT_INTERPOLATE, 0);
@ -2401,12 +2523,14 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const ui
            arg.dst = out_ptr_ndhw;
            arg.work_amount = workAmount;
            arg.oc_off = 0;
+            arg.post_op_data = post_ops_data_;
            (*interpolateKernel)(&arg);
        }
    });
 }

-void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) {
+void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                                                                   int B, int C, int IH, int IW, int OH, int OW) {
    const int idxNum = 1;
    int *xOrigin = static_cast<int*>(&indexTable[0]);
    float *xFactor = reinterpret_cast<float*>(&indexTable[OW]);
@ -2447,11 +2571,13 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t
            // for blk, src + IW*IH*blkSize, dst + OW*OH*blkSize, process the blkSize on next CB
            arg.work_amount = workAmount;
            arg.oc_off = 0;
+            arg.post_op_data = post_ops_data_;
            (*interpolateKernel)(&arg);
    });
 }

-void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) {
+void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                                                                int B, int C, int IH, int IW, int OH, int OW) {
    int tblAdvance = 0;
    int *xOrigin = static_cast<int*>(&indexTable[tblAdvance]);
    tblAdvance += OW;
@ -2481,6 +2607,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *i
        arg.weight_ptr[1] = yFactor;
        arg.work_amount = static_cast<size_t>(OW * OH);
        arg.oc_off = static_cast<size_t>(c * sizeof(float));
+        arg.post_op_data = post_ops_data_;
        (*interpolateKernel)(&arg);
    });
 }
@ -3284,7 +3411,7 @@ MKLDNNInterpolateNode::InterpolateJitExecutor::InterpolateJitExecutor(const Inte
    }
 }

-void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) {
+void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) {
    size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4];
    size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4];

@ -3294,25 +3421,25 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_,
    switch (mode) {
        case InterpolateMode::nearest: {
            if (configured_for_layout == InterpolateLayoutType::planar) {
-                NNPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
+                NNPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
            } else {
-                NNCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
+                NNCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
            }
            break;
        }
        case InterpolateMode::linear_onnx: {
            if (configured_for_layout == InterpolateLayoutType::planar) {
-                linearOnnxPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
+                linearOnnxPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
            } else {
-                linearOnnxCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
+                linearOnnxCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
            }
            break;
        }
        case InterpolateMode::cubic: {
            if (configured_for_layout == InterpolateLayoutType::planar) {
-                cubicPlanar(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW);
+                cubicPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, IH, IW, OH, OW);
            } else {
-                cubicCGathered(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW);
+                cubicCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, IH, IW, OH, OW);
            }
            break;
        }
@ -3322,7 +3449,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_,
    }
 }

-void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) {
+void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) {
    size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4];
    size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4];

--- a/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.h
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.h
@ -68,6 +68,8 @@ struct jit_interpolate_call_args {
    void *dst;
    size_t work_amount;
    size_t oc_off;
+    //ptr to array of post op inputs pointers (flat list)
+    const void* post_op_data;
 };

 struct jit_uni_interpolate_kernel {
@ -110,7 +112,6 @@ public:
    bool needPrepareParams() const override;
    void prepareParams() override;

-private:
    struct InterpolateAttrs {
        InterpolateMode mode;
        InterpolateCoordTransMode coordTransMode;
@ -122,7 +123,10 @@ private:
        InferenceEngine::Precision inPrc;
        InferenceEngine::Precision outPrc;
        InterpolateLayoutType layout;
-    } interpAttrs;
+    };
+
+private:
+    InterpolateAttrs interpAttrs;

    class InterpolateExecutor {
        public:
@ -131,7 +135,7 @@ private:
                                const VectorDims &dstDims,
                                const std::vector<float> &dataScales);

-            virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) = 0;
+            virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) = 0;
            virtual ~InterpolateExecutor() = default;
            VectorDims getSrcDimPad5d() const { return srcDimPad5d; }

@ -171,20 +175,26 @@ private:
                                   const std::vector<float> &dataScales,
                                   const mkldnn::primitive_attr &attr);

-            void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override;
+            void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) override;

        private:
            // nearest neighbor
-            void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
-            void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+            void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+            void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);

            // onnx linear
-            void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
-            void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+            void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+            void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);

            // cubic
-            void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
-            void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
+            void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                int B, int C, int IH, int IW, int OH, int OW);
+            void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
+                int B, int C, int IH, int IW, int OH, int OW);

        private:
            std::shared_ptr<jit_uni_interpolate_kernel> interpolateKernel = nullptr;
@ -198,7 +208,7 @@ private:
                                   const std::vector<float> &_dataScales) : dataScales(_dataScales), antialias(interpAttrs.antialias),
                InterpolateExecutor(interpAttrs, srcDims, dstDims, _dataScales) {}

-            void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override;
+            void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) override;

        private:
            void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
@ -234,7 +244,8 @@ private:
    bool isAxesSpecified = false;
    std::vector<int> axes;

-    mkldnn::primitive_attr attr;
+    // 6 ptrs for each quantization, 2 ptrs for each depth_wise
+    std::vector<const void*> postOpsDataPtrs;

    std::vector<float> lastScales;
    std::vector<int32_t> lastSizes;
--- a/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp
@ -374,16 +374,16 @@ const std::vector<ShapeParams> shapeParams4D_Smoke = {
    },
    ShapeParams{
        ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
-        InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}},
+        InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}},
        ngraph::helpers::InputLayerType::PARAMETER,
-        {{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}},
+        {{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}, {1.f, 1.f, 1.25f, 1.5f}},
        defaultAxes4D.front()
    },
    ShapeParams{
        ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
-        InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}},
+        InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}},
        ngraph::helpers::InputLayerType::PARAMETER,
-        {{1, 11, 6, 7}, {2, 7, 8, 7}},
+        {{1, 11, 6, 7}, {2, 7, 8, 7}, {1, 11, 6, 7}},
        defaultAxes4D.front()
    }
 };
@ -391,14 +391,14 @@ const std::vector<ShapeParams> shapeParams4D_Smoke = {
 const std::vector<ShapeParams> shapeParams4D_Full = {
    ShapeParams{
        ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
-        InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}},
+        InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}},
        ngraph::helpers::InputLayerType::CONSTANT,
        {{1.f, 1.f, 1.25f, 1.5f}},
        defaultAxes4D.front()
    },
    ShapeParams{
        ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
-        InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {1, 11, 5, 5}}},
+        InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {1, 11, 5, 5}, {1, 11, 4, 4}}},
        ngraph::helpers::InputLayerType::CONSTANT,
        {{1, 11, 5, 6}},
        defaultAxes4D.front()
@ -638,16 +638,16 @@ const std::vector<ShapeParams> shapeParams5D_Smoke = {
    },
    ShapeParams{
        ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
-        InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}},
+        InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}},
        ngraph::helpers::InputLayerType::PARAMETER,
-        {{1.f, 1.f, 1.25f, 1.5f, 0.5f}, {1.f, 1.f, 1.25f, 1.25f, 1.25f}},
+        {{1.f, 1.f, 1.25f, 1.5f, 0.5f}, {1.f, 1.f, 1.25f, 1.25f, 1.25f}, {1.f, 1.f, 1.25f, 1.5f, 0.5f}},
        defaultAxes5D.front()
    },
    ShapeParams{
        ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
-        InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}},
+        InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}},
        ngraph::helpers::InputLayerType::PARAMETER,
-        {{1, 11, 6, 7, 2}, {2, 7, 8, 7, 4}},
+        {{1, 11, 6, 7, 2}, {2, 7, 8, 7, 4}, {1, 11, 6, 7, 2}},
        defaultAxes5D.front()
    },
 };
@ -655,14 +655,14 @@ const std::vector<ShapeParams> shapeParams5D_Smoke = {
 const std::vector<ShapeParams> shapeParams5D_Full = {
    ShapeParams{
        ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
-        InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}},
+        InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}},
        ngraph::helpers::InputLayerType::CONSTANT,
        {{1.f, 1.f, 1.25f, 1.5f, 0.5f}},
        defaultAxes5D.front()
    },
    ShapeParams{
        ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
-        InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {1, 11, 5, 5, 8}}},
+        InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {1, 11, 5, 5, 8}, {1, 11, 4, 4, 4}}},
        ngraph::helpers::InputLayerType::CONSTANT,
        {{1, 11, 5, 6, 4}},
        defaultAxes5D.front()