[CPU] Interpolate runtime params cache (#9524)

This commit is contained in:
Chenhu Wang 2022-01-13 15:01:02 +08:00 committed by GitHub
parent f562e5572f
commit ac45196ce2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 203 additions and 64 deletions

View File

@ -1934,6 +1934,7 @@ void MKLDNNEltwiseNode::prepareParams() {
// together with the corresponding appendPostOps method to pass the scales and shifts pointers at runtime.
// Until then we have to read them from the quantization_t directly, store them somewhere
// and nullify them to get read of the address dependency in the key structure
fqDataPtrs.clear();
for (int i = 0; i < key.postOps.len(); ++i) {
auto &data = key.postOps.get()->entry_[i].quantization.data;
fqDataPtrs.insert(fqDataPtrs.end(), std::begin(data), std::end(data));

View File

@ -80,8 +80,10 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi
this->preamble();
if (attr_.post_ops_.len() != 0)
if (attr_.post_ops_.len() != 0) {
mov(reg_post_ops_data, ptr[reg_params + GET_OFF(post_op_data)]);
mov(reg_oc_off, ptr[reg_params + GET_OFF(oc_off)]);
}
if (isa == cpu::x64::avx512_common)
uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
@ -183,7 +185,8 @@ private:
Reg64 reg_tmp_64 = r10;
Xbyak::Reg64 reg_oc_off = rax;
Xbyak::Reg64 reg_d_weights = rbx;
Xbyak::Reg64 reg_post_ops_data = rbx;
Xbyak::Reg64 reg_d_weights = reg_tmp_64;
Xbyak::Reg64 reg_d_bias = rcx;
Xbyak::Reg32 reg_index_offset = edx;
@ -1569,16 +1572,20 @@ private:
int eltwise_inj_idx = 0;
int depthwise_inj_idx = 0;
int quantization_inj_idx = 0;
int post_ops_data_offset = 0;
for (int i = 0; i < p.len(); i++) {
auto& post_op = p.entry_[i];
if (post_op.is_eltwise()) {
eltwise_injectors[eltwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1);
eltwise_inj_idx++;
} else if (post_op.is_depthwise()) {
mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
mov(reg_d_weights, ptr[reg_post_ops_data + post_ops_data_offset]);
add(reg_d_weights, reg_oc_off);
post_ops_data_offset += sizeof(float*);
mov(reg_d_bias, ptr[reg_post_ops_data + post_ops_data_offset]);
add(reg_d_bias, reg_oc_off);
post_ops_data_offset += sizeof(float*);
// weight and bias is padded. scalar as vector.
depthwise_injectors[depthwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1, reg_d_weights, reg_d_bias, is_broadcast);
depthwise_inj_idx++;
@ -1588,23 +1595,102 @@ private:
int s_idx = vmm_val.getIdx();
quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_oc_off);
quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0, 0, is_broadcast);
quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_oc_off);
quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding, 0, is_broadcast);
if (do_dequantization) {
quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_oc_off);
quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0, 0, is_broadcast);
}
post_ops_data_offset += quantization_injectors[quantization_inj_idx]->memoryStep();
quantization_inj_idx++;
}
}
}
};
namespace {
struct InterpolateKey {
MKLDNNInterpolateNode::InterpolateAttrs nodeAttrs;
VectorDims srcDims;
VectorDims dstDims;
std::vector<float> dataScales;
mkldnn::primitive_attr attr;
size_t hash() const;
bool operator==(const InterpolateKey& rhs) const;
};
size_t InterpolateKey::hash() const {
using namespace dnnl::impl;
using namespace dnnl::impl::primitive_hashing;
size_t seed = 0;
seed = hash_combine(seed, nodeAttrs.mode);
seed = hash_combine(seed, nodeAttrs.coordTransMode);
seed = hash_combine(seed, nodeAttrs.nearestMode);
seed = hash_combine(seed, nodeAttrs.layout);
seed = hash_combine(seed, nodeAttrs.antialias);
seed = hash_combine(seed, nodeAttrs.cubeCoeff);
seed = get_vector_hash(seed, nodeAttrs.padBegin);
seed = get_vector_hash(seed, nodeAttrs.padEnd);
seed = hash_combine(seed, nodeAttrs.inPrc.getPrecVal());
seed = hash_combine(seed, nodeAttrs.outPrc.getPrecVal());
seed = get_vector_hash(seed, srcDims);
seed = get_vector_hash(seed, dstDims);
seed = get_vector_hash(seed, dataScales);
seed = hash_combine(seed, get_attr_hash(*attr.get()));
return seed;
}
bool InterpolateKey::operator==(const InterpolateKey &rhs) const {
if (nodeAttrs.mode != rhs.nodeAttrs.mode)
return false;
if (nodeAttrs.coordTransMode != rhs.nodeAttrs.coordTransMode)
return false;
if (nodeAttrs.nearestMode != rhs.nodeAttrs.nearestMode)
return false;
if (nodeAttrs.layout != rhs.nodeAttrs.layout)
return false;
if (nodeAttrs.antialias != rhs.nodeAttrs.antialias)
return false;
if (nodeAttrs.cubeCoeff != rhs.nodeAttrs.cubeCoeff)
return false;
if (nodeAttrs.padBegin != rhs.nodeAttrs.padBegin)
return false;
if (nodeAttrs.padEnd != rhs.nodeAttrs.padEnd)
return false;
if (nodeAttrs.inPrc != rhs.nodeAttrs.inPrc)
return false;
if (nodeAttrs.outPrc != rhs.nodeAttrs.outPrc)
return false;
if (nodeAttrs.layout != rhs.nodeAttrs.layout)
return false;
if (srcDims != rhs.srcDims)
return false;
if (dstDims != rhs.dstDims)
return false;
if (dataScales != rhs.dataScales)
return false;
if (!(*attr.get() == *rhs.attr.get()))
return false;
return true;
}
} // namespace
// shapeND: n c d h w
// blockND: ncdhw cdhw dhw hw w 1
// index : 0 1 2 3 4 5
@ -2027,31 +2113,63 @@ void MKLDNNInterpolateNode::prepareParams() {
IE_THROW() << errorPrefix << " did not allocate target shape memory";
if (!scaleMemPtr || !scaleMemPtr->GetPrimitivePtr())
IE_THROW() << errorPrefix << " did not allocate scales memory";
if (getSelectedPrimitiveDescriptor() == nullptr)
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
IE_THROW() << errorPrefix << " did not set preferable primitive descriptor";
const auto &srcDims = srcMemPtr->getStaticDims();
const auto &dstDims = dstMemPtr->getStaticDims();
setPostOps(attr, dstDims, true);
std::vector<float> dataScales = getScales(getPaddedInputShape(srcDims, interpAttrs.padBegin, interpAttrs.padEnd), dstDims);
if (getOutputShapeAtPort(0).getRank() > 2 && (dataScales[0] != 1.f || dataScales[1] != 1.f)) {
IE_THROW() << "Interpolate layer only supports resize on spatial dimensions(depth, height and width)";
}
if ((interpAttrs.mode == InterpolateMode::nearest || interpAttrs.mode == InterpolateMode::linear_onnx || interpAttrs.mode == InterpolateMode::cubic) &&
((interpAttrs.layout != InterpolateLayoutType::planar && mayiuse(cpu::x64::sse41)) ||
(mayiuse(cpu::x64::avx2) && interpAttrs.inPrc == Precision::FP32))) {
execPtr = std::make_shared<InterpolateJitExecutor>(interpAttrs,
srcDims,
dstDims,
dataScales,
attr);
} else {
execPtr = std::make_shared<InterpolateRefExecutor>(interpAttrs,
srcDims,
dstDims,
dataScales);
InterpolateKey key = {interpAttrs, srcDims, dstDims, dataScales, mkldnn::primitive_attr()};
setPostOps(key.attr, dstDims, true);
postOpsDataPtrs.clear();
auto &postOps = (*key.attr.get()).post_ops_;
for (int i = 0; i < postOps.len(); ++i) {
auto &postOp = postOps.entry_[i];
if (postOp.is_quantization()) {
auto &data = postOp.quantization.data;
postOpsDataPtrs.insert(postOpsDataPtrs.end(), std::begin(data), std::end(data));
memset(data, 0, sizeof(data));
} else if (postOp.is_depthwise()) {
auto &weights = postOp.depthwise.weights_data;
auto &biases = postOp.depthwise.biases_data;
postOpsDataPtrs.push_back(weights);
postOpsDataPtrs.push_back(biases);
weights = 0;
biases = 0;
}
}
auto buildExecutor = [&](const InterpolateKey& key) -> std::shared_ptr<InterpolateExecutor> {
std::shared_ptr<InterpolateExecutor> executor;
if ((key.nodeAttrs.mode == InterpolateMode::nearest || key.nodeAttrs.mode == InterpolateMode::linear_onnx ||
key.nodeAttrs.mode == InterpolateMode::cubic) &&
((key.nodeAttrs.layout != InterpolateLayoutType::planar && mayiuse(cpu::x64::sse41)) ||
(mayiuse(cpu::x64::avx2) && key.nodeAttrs.inPrc == Precision::FP32))) {
executor = std::make_shared<InterpolateJitExecutor>(key.nodeAttrs,
key.srcDims,
key.dstDims,
key.dataScales,
key.attr);
} else {
executor = std::make_shared<InterpolateRefExecutor>(key.nodeAttrs,
key.srcDims,
key.dstDims,
key.dataScales);
}
return executor;
};
auto cache = getRuntimeCache();
auto result = cache->getOrCreate(key, buildExecutor);
execPtr = result.first;
lastOutputDims = dstDims;
}
@ -2223,13 +2341,13 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) {
src_data = src_data_origin;
}
execPtr->exec(src_data, dst_data);
execPtr->exec(src_data, dst_data, static_cast<const void*>(&postOpsDataPtrs[0]));
}
// for ndhwc and nCdhw8c[16c]
// input may be f32/bf16/int8, fused->output varies
void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C,
int ID, int IH, int IW, int OD, int OH, int OW) {
void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
int *index_d = static_cast<int*>(&indexTable[0]);
int *index_h = static_cast<int*>(&indexTable[OD]);
int *index_w = static_cast<int*>(&indexTable[OD + OH]);
@ -2254,6 +2372,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i
arg.index = static_cast<int*>(&(index_w_kernel[0]));
arg.work_amount = C;
arg.oc_off = 0;
arg.post_op_data = post_ops_data_;
(*interpolateKernel)(&arg);
});
} else { // for blk
@ -2275,6 +2394,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i
arg.index = static_cast<int*>(&(index_w_kernel[0]));
arg.work_amount = static_cast<size_t>(OW);
arg.oc_off = cb * blk_size * sizeof(float);
arg.post_op_data = post_ops_data_;
(*interpolateKernel)(&arg);
}
});
@ -2282,8 +2402,8 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i
} // batch end
}
void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
int OD, int OH, int OW) {
void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
int *index_d = static_cast<int*>(&indexTable[0]);
int *index_h = static_cast<int*>(&indexTable[OD]);
int *index_w = static_cast<int*>(&indexTable[OD + OH]);
@ -2308,11 +2428,12 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_p
arg.index = static_cast<int*>(&index_kernel[0]); // need index_h and index_w in kernel, it's in continous memory so one param
arg.oc_off = static_cast<size_t>(c * sizeof(float));
// work_amount is OH(out loop) and OW(inner loop), can get in kernel from jcp.
arg.post_op_data = post_ops_data_;
(*interpolateKernel)(&arg);
});
}
void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C,
void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C,
int ID, int IH, int IW, int OD, int OH, int OW) {
// FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, EndTopLeft:4, EndTopRight:5, EndBottomLeft:6, EndBottomRight:7
// weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5
@ -2331,12 +2452,13 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8
arg.dst = out_ptr_nc;
arg.work_amount = OW * OH * OD;
arg.oc_off = static_cast<size_t>(c * sizeof(float));
arg.post_op_data = post_ops_data_;
(*interpolateKernel)(&arg);
});
}
void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
int OD, int OH, int OW) {
void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
// left:OW right:OW Top:OH Bottom:OH Front:OD End:OD
std::vector<int*> indexPtr(MAX_INPUT_INTERPOLATE, 0);
std::vector<float*> weightPtr(MAX_INPUT_INTERPOLATE, 0);
@ -2401,12 +2523,14 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const ui
arg.dst = out_ptr_ndhw;
arg.work_amount = workAmount;
arg.oc_off = 0;
arg.post_op_data = post_ops_data_;
(*interpolateKernel)(&arg);
}
});
}
void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) {
void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int IH, int IW, int OH, int OW) {
const int idxNum = 1;
int *xOrigin = static_cast<int*>(&indexTable[0]);
float *xFactor = reinterpret_cast<float*>(&indexTable[OW]);
@ -2447,11 +2571,13 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t
// for blk, src + IW*IH*blkSize, dst + OW*OH*blkSize, process the blkSize on next CB
arg.work_amount = workAmount;
arg.oc_off = 0;
arg.post_op_data = post_ops_data_;
(*interpolateKernel)(&arg);
});
}
void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) {
void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int IH, int IW, int OH, int OW) {
int tblAdvance = 0;
int *xOrigin = static_cast<int*>(&indexTable[tblAdvance]);
tblAdvance += OW;
@ -2481,6 +2607,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *i
arg.weight_ptr[1] = yFactor;
arg.work_amount = static_cast<size_t>(OW * OH);
arg.oc_off = static_cast<size_t>(c * sizeof(float));
arg.post_op_data = post_ops_data_;
(*interpolateKernel)(&arg);
});
}
@ -3284,7 +3411,7 @@ MKLDNNInterpolateNode::InterpolateJitExecutor::InterpolateJitExecutor(const Inte
}
}
void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) {
void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) {
size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4];
size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4];
@ -3294,25 +3421,25 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_,
switch (mode) {
case InterpolateMode::nearest: {
if (configured_for_layout == InterpolateLayoutType::planar) {
NNPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
NNPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
} else {
NNCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
NNCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
}
break;
}
case InterpolateMode::linear_onnx: {
if (configured_for_layout == InterpolateLayoutType::planar) {
linearOnnxPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
linearOnnxPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
} else {
linearOnnxCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
linearOnnxCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
}
break;
}
case InterpolateMode::cubic: {
if (configured_for_layout == InterpolateLayoutType::planar) {
cubicPlanar(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW);
cubicPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, IH, IW, OH, OW);
} else {
cubicCGathered(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW);
cubicCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, IH, IW, OH, OW);
}
break;
}
@ -3322,7 +3449,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_,
}
}
void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) {
void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) {
size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4];
size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4];

View File

@ -68,6 +68,8 @@ struct jit_interpolate_call_args {
void *dst;
size_t work_amount;
size_t oc_off;
//ptr to array of post op inputs pointers (flat list)
const void* post_op_data;
};
struct jit_uni_interpolate_kernel {
@ -110,7 +112,6 @@ public:
bool needPrepareParams() const override;
void prepareParams() override;
private:
struct InterpolateAttrs {
InterpolateMode mode;
InterpolateCoordTransMode coordTransMode;
@ -122,7 +123,10 @@ private:
InferenceEngine::Precision inPrc;
InferenceEngine::Precision outPrc;
InterpolateLayoutType layout;
} interpAttrs;
};
private:
InterpolateAttrs interpAttrs;
class InterpolateExecutor {
public:
@ -131,7 +135,7 @@ private:
const VectorDims &dstDims,
const std::vector<float> &dataScales);
virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) = 0;
virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) = 0;
virtual ~InterpolateExecutor() = default;
VectorDims getSrcDimPad5d() const { return srcDimPad5d; }
@ -171,20 +175,26 @@ private:
const std::vector<float> &dataScales,
const mkldnn::primitive_attr &attr);
void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override;
void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) override;
private:
// nearest neighbor
void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
// onnx linear
void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
// cubic
void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int IH, int IW, int OH, int OW);
void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
int B, int C, int IH, int IW, int OH, int OW);
private:
std::shared_ptr<jit_uni_interpolate_kernel> interpolateKernel = nullptr;
@ -198,7 +208,7 @@ private:
const std::vector<float> &_dataScales) : dataScales(_dataScales), antialias(interpAttrs.antialias),
InterpolateExecutor(interpAttrs, srcDims, dstDims, _dataScales) {}
void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override;
void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) override;
private:
void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
@ -234,7 +244,8 @@ private:
bool isAxesSpecified = false;
std::vector<int> axes;
mkldnn::primitive_attr attr;
// 6 ptrs for each quantization, 2 ptrs for each depth_wise
std::vector<const void*> postOpsDataPtrs;
std::vector<float> lastScales;
std::vector<int32_t> lastSizes;

View File

@ -374,16 +374,16 @@ const std::vector<ShapeParams> shapeParams4D_Smoke = {
},
ShapeParams{
ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}},
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}},
ngraph::helpers::InputLayerType::PARAMETER,
{{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}},
{{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}, {1.f, 1.f, 1.25f, 1.5f}},
defaultAxes4D.front()
},
ShapeParams{
ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}},
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}},
ngraph::helpers::InputLayerType::PARAMETER,
{{1, 11, 6, 7}, {2, 7, 8, 7}},
{{1, 11, 6, 7}, {2, 7, 8, 7}, {1, 11, 6, 7}},
defaultAxes4D.front()
}
};
@ -391,14 +391,14 @@ const std::vector<ShapeParams> shapeParams4D_Smoke = {
const std::vector<ShapeParams> shapeParams4D_Full = {
ShapeParams{
ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}},
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}},
ngraph::helpers::InputLayerType::CONSTANT,
{{1.f, 1.f, 1.25f, 1.5f}},
defaultAxes4D.front()
},
ShapeParams{
ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {1, 11, 5, 5}}},
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {1, 11, 5, 5}, {1, 11, 4, 4}}},
ngraph::helpers::InputLayerType::CONSTANT,
{{1, 11, 5, 6}},
defaultAxes4D.front()
@ -638,16 +638,16 @@ const std::vector<ShapeParams> shapeParams5D_Smoke = {
},
ShapeParams{
ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}},
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}},
ngraph::helpers::InputLayerType::PARAMETER,
{{1.f, 1.f, 1.25f, 1.5f, 0.5f}, {1.f, 1.f, 1.25f, 1.25f, 1.25f}},
{{1.f, 1.f, 1.25f, 1.5f, 0.5f}, {1.f, 1.f, 1.25f, 1.25f, 1.25f}, {1.f, 1.f, 1.25f, 1.5f, 0.5f}},
defaultAxes5D.front()
},
ShapeParams{
ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}},
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}},
ngraph::helpers::InputLayerType::PARAMETER,
{{1, 11, 6, 7, 2}, {2, 7, 8, 7, 4}},
{{1, 11, 6, 7, 2}, {2, 7, 8, 7, 4}, {1, 11, 6, 7, 2}},
defaultAxes5D.front()
},
};
@ -655,14 +655,14 @@ const std::vector<ShapeParams> shapeParams5D_Smoke = {
const std::vector<ShapeParams> shapeParams5D_Full = {
ShapeParams{
ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}},
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}},
ngraph::helpers::InputLayerType::CONSTANT,
{{1.f, 1.f, 1.25f, 1.5f, 0.5f}},
defaultAxes5D.front()
},
ShapeParams{
ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {1, 11, 5, 5, 8}}},
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {1, 11, 5, 5, 8}, {1, 11, 4, 4, 4}}},
ngraph::helpers::InputLayerType::CONSTANT,
{{1, 11, 5, 6, 4}},
defaultAxes5D.front()