[CPU] Interpolate runtime params cache (#9524)
This commit is contained in:
parent
f562e5572f
commit
ac45196ce2
@ -1934,6 +1934,7 @@ void MKLDNNEltwiseNode::prepareParams() {
|
||||
// together with the corresponding appendPostOps method to pass the scales and shifts pointers at runtime.
|
||||
// Until then we have to read them from the quantization_t directly, store them somewhere
|
||||
// and nullify them to get read of the address dependency in the key structure
|
||||
fqDataPtrs.clear();
|
||||
for (int i = 0; i < key.postOps.len(); ++i) {
|
||||
auto &data = key.postOps.get()->entry_[i].quantization.data;
|
||||
fqDataPtrs.insert(fqDataPtrs.end(), std::begin(data), std::end(data));
|
||||
|
@ -80,8 +80,10 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi
|
||||
|
||||
this->preamble();
|
||||
|
||||
if (attr_.post_ops_.len() != 0)
|
||||
if (attr_.post_ops_.len() != 0) {
|
||||
mov(reg_post_ops_data, ptr[reg_params + GET_OFF(post_op_data)]);
|
||||
mov(reg_oc_off, ptr[reg_params + GET_OFF(oc_off)]);
|
||||
}
|
||||
if (isa == cpu::x64::avx512_common)
|
||||
uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
|
||||
|
||||
@ -183,7 +185,8 @@ private:
|
||||
Reg64 reg_tmp_64 = r10;
|
||||
|
||||
Xbyak::Reg64 reg_oc_off = rax;
|
||||
Xbyak::Reg64 reg_d_weights = rbx;
|
||||
Xbyak::Reg64 reg_post_ops_data = rbx;
|
||||
Xbyak::Reg64 reg_d_weights = reg_tmp_64;
|
||||
Xbyak::Reg64 reg_d_bias = rcx;
|
||||
Xbyak::Reg32 reg_index_offset = edx;
|
||||
|
||||
@ -1569,16 +1572,20 @@ private:
|
||||
int eltwise_inj_idx = 0;
|
||||
int depthwise_inj_idx = 0;
|
||||
int quantization_inj_idx = 0;
|
||||
int post_ops_data_offset = 0;
|
||||
for (int i = 0; i < p.len(); i++) {
|
||||
auto& post_op = p.entry_[i];
|
||||
if (post_op.is_eltwise()) {
|
||||
eltwise_injectors[eltwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1);
|
||||
eltwise_inj_idx++;
|
||||
} else if (post_op.is_depthwise()) {
|
||||
mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
|
||||
mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
|
||||
mov(reg_d_weights, ptr[reg_post_ops_data + post_ops_data_offset]);
|
||||
add(reg_d_weights, reg_oc_off);
|
||||
post_ops_data_offset += sizeof(float*);
|
||||
mov(reg_d_bias, ptr[reg_post_ops_data + post_ops_data_offset]);
|
||||
add(reg_d_bias, reg_oc_off);
|
||||
post_ops_data_offset += sizeof(float*);
|
||||
|
||||
// weight and bias is padded. scalar as vector.
|
||||
depthwise_injectors[depthwise_inj_idx]->compute_vector_range(vmm_val.getIdx(), vmm_val.getIdx() + 1, reg_d_weights, reg_d_bias, is_broadcast);
|
||||
depthwise_inj_idx++;
|
||||
@ -1588,23 +1595,102 @@ private:
|
||||
|
||||
int s_idx = vmm_val.getIdx();
|
||||
|
||||
quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_oc_off);
|
||||
quantization_injectors[quantization_inj_idx]->init_crop_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
|
||||
quantization_injectors[quantization_inj_idx]->compute_crop(s_idx, s_idx + 1, 0, 0, is_broadcast);
|
||||
|
||||
quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_oc_off);
|
||||
quantization_injectors[quantization_inj_idx]->init_input_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
|
||||
quantization_injectors[quantization_inj_idx]->compute_input_scale_shift(s_idx, s_idx + 1, 0, do_rounding, 0, is_broadcast);
|
||||
|
||||
if (do_dequantization) {
|
||||
quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_oc_off);
|
||||
quantization_injectors[quantization_inj_idx]->init_output_scale_shift_ptrs(reg_post_ops_data + post_ops_data_offset, reg_oc_off);
|
||||
quantization_injectors[quantization_inj_idx]->compute_output_scale_shift(s_idx, s_idx + 1, 0, 0, is_broadcast);
|
||||
}
|
||||
|
||||
post_ops_data_offset += quantization_injectors[quantization_inj_idx]->memoryStep();
|
||||
quantization_inj_idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
struct InterpolateKey {
|
||||
MKLDNNInterpolateNode::InterpolateAttrs nodeAttrs;
|
||||
VectorDims srcDims;
|
||||
VectorDims dstDims;
|
||||
std::vector<float> dataScales;
|
||||
mkldnn::primitive_attr attr;
|
||||
|
||||
size_t hash() const;
|
||||
bool operator==(const InterpolateKey& rhs) const;
|
||||
};
|
||||
|
||||
size_t InterpolateKey::hash() const {
|
||||
using namespace dnnl::impl;
|
||||
using namespace dnnl::impl::primitive_hashing;
|
||||
|
||||
size_t seed = 0;
|
||||
|
||||
seed = hash_combine(seed, nodeAttrs.mode);
|
||||
seed = hash_combine(seed, nodeAttrs.coordTransMode);
|
||||
seed = hash_combine(seed, nodeAttrs.nearestMode);
|
||||
seed = hash_combine(seed, nodeAttrs.layout);
|
||||
|
||||
seed = hash_combine(seed, nodeAttrs.antialias);
|
||||
seed = hash_combine(seed, nodeAttrs.cubeCoeff);
|
||||
|
||||
seed = get_vector_hash(seed, nodeAttrs.padBegin);
|
||||
seed = get_vector_hash(seed, nodeAttrs.padEnd);
|
||||
|
||||
seed = hash_combine(seed, nodeAttrs.inPrc.getPrecVal());
|
||||
seed = hash_combine(seed, nodeAttrs.outPrc.getPrecVal());
|
||||
|
||||
seed = get_vector_hash(seed, srcDims);
|
||||
seed = get_vector_hash(seed, dstDims);
|
||||
seed = get_vector_hash(seed, dataScales);
|
||||
|
||||
seed = hash_combine(seed, get_attr_hash(*attr.get()));
|
||||
return seed;
|
||||
}
|
||||
|
||||
bool InterpolateKey::operator==(const InterpolateKey &rhs) const {
|
||||
if (nodeAttrs.mode != rhs.nodeAttrs.mode)
|
||||
return false;
|
||||
if (nodeAttrs.coordTransMode != rhs.nodeAttrs.coordTransMode)
|
||||
return false;
|
||||
if (nodeAttrs.nearestMode != rhs.nodeAttrs.nearestMode)
|
||||
return false;
|
||||
if (nodeAttrs.layout != rhs.nodeAttrs.layout)
|
||||
return false;
|
||||
if (nodeAttrs.antialias != rhs.nodeAttrs.antialias)
|
||||
return false;
|
||||
if (nodeAttrs.cubeCoeff != rhs.nodeAttrs.cubeCoeff)
|
||||
return false;
|
||||
if (nodeAttrs.padBegin != rhs.nodeAttrs.padBegin)
|
||||
return false;
|
||||
if (nodeAttrs.padEnd != rhs.nodeAttrs.padEnd)
|
||||
return false;
|
||||
if (nodeAttrs.inPrc != rhs.nodeAttrs.inPrc)
|
||||
return false;
|
||||
if (nodeAttrs.outPrc != rhs.nodeAttrs.outPrc)
|
||||
return false;
|
||||
if (nodeAttrs.layout != rhs.nodeAttrs.layout)
|
||||
return false;
|
||||
|
||||
if (srcDims != rhs.srcDims)
|
||||
return false;
|
||||
if (dstDims != rhs.dstDims)
|
||||
return false;
|
||||
if (dataScales != rhs.dataScales)
|
||||
return false;
|
||||
if (!(*attr.get() == *rhs.attr.get()))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// shapeND: n c d h w
|
||||
// blockND: ncdhw cdhw dhw hw w 1
|
||||
// index : 0 1 2 3 4 5
|
||||
@ -2027,31 +2113,63 @@ void MKLDNNInterpolateNode::prepareParams() {
|
||||
IE_THROW() << errorPrefix << " did not allocate target shape memory";
|
||||
if (!scaleMemPtr || !scaleMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << errorPrefix << " did not allocate scales memory";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
IE_THROW() << errorPrefix << " did not set preferable primitive descriptor";
|
||||
|
||||
const auto &srcDims = srcMemPtr->getStaticDims();
|
||||
const auto &dstDims = dstMemPtr->getStaticDims();
|
||||
setPostOps(attr, dstDims, true);
|
||||
|
||||
std::vector<float> dataScales = getScales(getPaddedInputShape(srcDims, interpAttrs.padBegin, interpAttrs.padEnd), dstDims);
|
||||
if (getOutputShapeAtPort(0).getRank() > 2 && (dataScales[0] != 1.f || dataScales[1] != 1.f)) {
|
||||
IE_THROW() << "Interpolate layer only supports resize on spatial dimensions(depth, height and width)";
|
||||
}
|
||||
if ((interpAttrs.mode == InterpolateMode::nearest || interpAttrs.mode == InterpolateMode::linear_onnx || interpAttrs.mode == InterpolateMode::cubic) &&
|
||||
((interpAttrs.layout != InterpolateLayoutType::planar && mayiuse(cpu::x64::sse41)) ||
|
||||
(mayiuse(cpu::x64::avx2) && interpAttrs.inPrc == Precision::FP32))) {
|
||||
execPtr = std::make_shared<InterpolateJitExecutor>(interpAttrs,
|
||||
srcDims,
|
||||
dstDims,
|
||||
dataScales,
|
||||
attr);
|
||||
} else {
|
||||
execPtr = std::make_shared<InterpolateRefExecutor>(interpAttrs,
|
||||
srcDims,
|
||||
dstDims,
|
||||
dataScales);
|
||||
|
||||
InterpolateKey key = {interpAttrs, srcDims, dstDims, dataScales, mkldnn::primitive_attr()};
|
||||
setPostOps(key.attr, dstDims, true);
|
||||
|
||||
postOpsDataPtrs.clear();
|
||||
auto &postOps = (*key.attr.get()).post_ops_;
|
||||
for (int i = 0; i < postOps.len(); ++i) {
|
||||
auto &postOp = postOps.entry_[i];
|
||||
if (postOp.is_quantization()) {
|
||||
auto &data = postOp.quantization.data;
|
||||
postOpsDataPtrs.insert(postOpsDataPtrs.end(), std::begin(data), std::end(data));
|
||||
memset(data, 0, sizeof(data));
|
||||
} else if (postOp.is_depthwise()) {
|
||||
auto &weights = postOp.depthwise.weights_data;
|
||||
auto &biases = postOp.depthwise.biases_data;
|
||||
postOpsDataPtrs.push_back(weights);
|
||||
postOpsDataPtrs.push_back(biases);
|
||||
weights = 0;
|
||||
biases = 0;
|
||||
}
|
||||
}
|
||||
|
||||
auto buildExecutor = [&](const InterpolateKey& key) -> std::shared_ptr<InterpolateExecutor> {
|
||||
std::shared_ptr<InterpolateExecutor> executor;
|
||||
if ((key.nodeAttrs.mode == InterpolateMode::nearest || key.nodeAttrs.mode == InterpolateMode::linear_onnx ||
|
||||
key.nodeAttrs.mode == InterpolateMode::cubic) &&
|
||||
((key.nodeAttrs.layout != InterpolateLayoutType::planar && mayiuse(cpu::x64::sse41)) ||
|
||||
(mayiuse(cpu::x64::avx2) && key.nodeAttrs.inPrc == Precision::FP32))) {
|
||||
executor = std::make_shared<InterpolateJitExecutor>(key.nodeAttrs,
|
||||
key.srcDims,
|
||||
key.dstDims,
|
||||
key.dataScales,
|
||||
key.attr);
|
||||
} else {
|
||||
executor = std::make_shared<InterpolateRefExecutor>(key.nodeAttrs,
|
||||
key.srcDims,
|
||||
key.dstDims,
|
||||
key.dataScales);
|
||||
}
|
||||
return executor;
|
||||
};
|
||||
|
||||
auto cache = getRuntimeCache();
|
||||
auto result = cache->getOrCreate(key, buildExecutor);
|
||||
execPtr = result.first;
|
||||
|
||||
lastOutputDims = dstDims;
|
||||
}
|
||||
|
||||
@ -2223,13 +2341,13 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) {
|
||||
src_data = src_data_origin;
|
||||
}
|
||||
|
||||
execPtr->exec(src_data, dst_data);
|
||||
execPtr->exec(src_data, dst_data, static_cast<const void*>(&postOpsDataPtrs[0]));
|
||||
}
|
||||
|
||||
// for ndhwc and nCdhw8c[16c]
|
||||
// input may be f32/bf16/int8, fused->output varies
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C,
|
||||
int ID, int IH, int IW, int OD, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
|
||||
int *index_d = static_cast<int*>(&indexTable[0]);
|
||||
int *index_h = static_cast<int*>(&indexTable[OD]);
|
||||
int *index_w = static_cast<int*>(&indexTable[OD + OH]);
|
||||
@ -2254,6 +2372,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i
|
||||
arg.index = static_cast<int*>(&(index_w_kernel[0]));
|
||||
arg.work_amount = C;
|
||||
arg.oc_off = 0;
|
||||
arg.post_op_data = post_ops_data_;
|
||||
(*interpolateKernel)(&arg);
|
||||
});
|
||||
} else { // for blk
|
||||
@ -2275,6 +2394,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i
|
||||
arg.index = static_cast<int*>(&(index_w_kernel[0]));
|
||||
arg.work_amount = static_cast<size_t>(OW);
|
||||
arg.oc_off = cb * blk_size * sizeof(float);
|
||||
arg.post_op_data = post_ops_data_;
|
||||
(*interpolateKernel)(&arg);
|
||||
}
|
||||
});
|
||||
@ -2282,8 +2402,8 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i
|
||||
} // batch end
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
|
||||
int OD, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
|
||||
int *index_d = static_cast<int*>(&indexTable[0]);
|
||||
int *index_h = static_cast<int*>(&indexTable[OD]);
|
||||
int *index_w = static_cast<int*>(&indexTable[OD + OH]);
|
||||
@ -2308,11 +2428,12 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_p
|
||||
arg.index = static_cast<int*>(&index_kernel[0]); // need index_h and index_w in kernel, it's in continous memory so one param
|
||||
arg.oc_off = static_cast<size_t>(c * sizeof(float));
|
||||
// work_amount is OH(out loop) and OW(inner loop), can get in kernel from jcp.
|
||||
arg.post_op_data = post_ops_data_;
|
||||
(*interpolateKernel)(&arg);
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C,
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C,
|
||||
int ID, int IH, int IW, int OD, int OH, int OW) {
|
||||
// FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, EndTopLeft:4, EndTopRight:5, EndBottomLeft:6, EndBottomRight:7
|
||||
// weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5
|
||||
@ -2331,12 +2452,13 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8
|
||||
arg.dst = out_ptr_nc;
|
||||
arg.work_amount = OW * OH * OD;
|
||||
arg.oc_off = static_cast<size_t>(c * sizeof(float));
|
||||
arg.post_op_data = post_ops_data_;
|
||||
(*interpolateKernel)(&arg);
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
|
||||
int OD, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
|
||||
// left:OW right:OW Top:OH Bottom:OH Front:OD End:OD
|
||||
std::vector<int*> indexPtr(MAX_INPUT_INTERPOLATE, 0);
|
||||
std::vector<float*> weightPtr(MAX_INPUT_INTERPOLATE, 0);
|
||||
@ -2401,12 +2523,14 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const ui
|
||||
arg.dst = out_ptr_ndhw;
|
||||
arg.work_amount = workAmount;
|
||||
arg.oc_off = 0;
|
||||
arg.post_op_data = post_ops_data_;
|
||||
(*interpolateKernel)(&arg);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int IH, int IW, int OH, int OW) {
|
||||
const int idxNum = 1;
|
||||
int *xOrigin = static_cast<int*>(&indexTable[0]);
|
||||
float *xFactor = reinterpret_cast<float*>(&indexTable[OW]);
|
||||
@ -2447,11 +2571,13 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t
|
||||
// for blk, src + IW*IH*blkSize, dst + OW*OH*blkSize, process the blkSize on next CB
|
||||
arg.work_amount = workAmount;
|
||||
arg.oc_off = 0;
|
||||
arg.post_op_data = post_ops_data_;
|
||||
(*interpolateKernel)(&arg);
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int IH, int IW, int OH, int OW) {
|
||||
int tblAdvance = 0;
|
||||
int *xOrigin = static_cast<int*>(&indexTable[tblAdvance]);
|
||||
tblAdvance += OW;
|
||||
@ -2481,6 +2607,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *i
|
||||
arg.weight_ptr[1] = yFactor;
|
||||
arg.work_amount = static_cast<size_t>(OW * OH);
|
||||
arg.oc_off = static_cast<size_t>(c * sizeof(float));
|
||||
arg.post_op_data = post_ops_data_;
|
||||
(*interpolateKernel)(&arg);
|
||||
});
|
||||
}
|
||||
@ -3284,7 +3411,7 @@ MKLDNNInterpolateNode::InterpolateJitExecutor::InterpolateJitExecutor(const Inte
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) {
|
||||
void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) {
|
||||
size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4];
|
||||
size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4];
|
||||
|
||||
@ -3294,25 +3421,25 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_,
|
||||
switch (mode) {
|
||||
case InterpolateMode::nearest: {
|
||||
if (configured_for_layout == InterpolateLayoutType::planar) {
|
||||
NNPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
|
||||
NNPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
|
||||
} else {
|
||||
NNCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
|
||||
NNCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case InterpolateMode::linear_onnx: {
|
||||
if (configured_for_layout == InterpolateLayoutType::planar) {
|
||||
linearOnnxPlanar(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
|
||||
linearOnnxPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
|
||||
} else {
|
||||
linearOnnxCGathered(in_ptr_, out_ptr_, N, C, ID, IH, IW, OD, OH, OW);
|
||||
linearOnnxCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, ID, IH, IW, OD, OH, OW);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case InterpolateMode::cubic: {
|
||||
if (configured_for_layout == InterpolateLayoutType::planar) {
|
||||
cubicPlanar(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW);
|
||||
cubicPlanar(in_ptr_, out_ptr_, post_ops_data_, N, C, IH, IW, OH, OW);
|
||||
} else {
|
||||
cubicCGathered(in_ptr_, out_ptr_, N, C, IH, IW, OH, OW);
|
||||
cubicCGathered(in_ptr_, out_ptr_, post_ops_data_, N, C, IH, IW, OH, OW);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -3322,7 +3449,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_,
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) {
|
||||
void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) {
|
||||
size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4];
|
||||
size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4];
|
||||
|
||||
|
@ -68,6 +68,8 @@ struct jit_interpolate_call_args {
|
||||
void *dst;
|
||||
size_t work_amount;
|
||||
size_t oc_off;
|
||||
//ptr to array of post op inputs pointers (flat list)
|
||||
const void* post_op_data;
|
||||
};
|
||||
|
||||
struct jit_uni_interpolate_kernel {
|
||||
@ -110,7 +112,6 @@ public:
|
||||
bool needPrepareParams() const override;
|
||||
void prepareParams() override;
|
||||
|
||||
private:
|
||||
struct InterpolateAttrs {
|
||||
InterpolateMode mode;
|
||||
InterpolateCoordTransMode coordTransMode;
|
||||
@ -122,7 +123,10 @@ private:
|
||||
InferenceEngine::Precision inPrc;
|
||||
InferenceEngine::Precision outPrc;
|
||||
InterpolateLayoutType layout;
|
||||
} interpAttrs;
|
||||
};
|
||||
|
||||
private:
|
||||
InterpolateAttrs interpAttrs;
|
||||
|
||||
class InterpolateExecutor {
|
||||
public:
|
||||
@ -131,7 +135,7 @@ private:
|
||||
const VectorDims &dstDims,
|
||||
const std::vector<float> &dataScales);
|
||||
|
||||
virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) = 0;
|
||||
virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) = 0;
|
||||
virtual ~InterpolateExecutor() = default;
|
||||
VectorDims getSrcDimPad5d() const { return srcDimPad5d; }
|
||||
|
||||
@ -171,20 +175,26 @@ private:
|
||||
const std::vector<float> &dataScales,
|
||||
const mkldnn::primitive_attr &attr);
|
||||
|
||||
void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override;
|
||||
void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) override;
|
||||
|
||||
private:
|
||||
// nearest neighbor
|
||||
void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
|
||||
// onnx linear
|
||||
void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
|
||||
// cubic
|
||||
void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
|
||||
void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
|
||||
void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int IH, int IW, int OH, int OW);
|
||||
void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_,
|
||||
int B, int C, int IH, int IW, int OH, int OW);
|
||||
|
||||
private:
|
||||
std::shared_ptr<jit_uni_interpolate_kernel> interpolateKernel = nullptr;
|
||||
@ -198,7 +208,7 @@ private:
|
||||
const std::vector<float> &_dataScales) : dataScales(_dataScales), antialias(interpAttrs.antialias),
|
||||
InterpolateExecutor(interpAttrs, srcDims, dstDims, _dataScales) {}
|
||||
|
||||
void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override;
|
||||
void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) override;
|
||||
|
||||
private:
|
||||
void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
@ -234,7 +244,8 @@ private:
|
||||
bool isAxesSpecified = false;
|
||||
std::vector<int> axes;
|
||||
|
||||
mkldnn::primitive_attr attr;
|
||||
// 6 ptrs for each quantization, 2 ptrs for each depth_wise
|
||||
std::vector<const void*> postOpsDataPtrs;
|
||||
|
||||
std::vector<float> lastScales;
|
||||
std::vector<int32_t> lastSizes;
|
||||
|
@ -374,16 +374,16 @@ const std::vector<ShapeParams> shapeParams4D_Smoke = {
|
||||
},
|
||||
ShapeParams{
|
||||
ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
|
||||
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}},
|
||||
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}},
|
||||
{{1.f, 1.f, 1.25f, 1.5f}, {1.f, 1.f, 1.25f, 1.25f}, {1.f, 1.f, 1.25f, 1.5f}},
|
||||
defaultAxes4D.front()
|
||||
},
|
||||
ShapeParams{
|
||||
ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
|
||||
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}},
|
||||
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{1, 11, 6, 7}, {2, 7, 8, 7}},
|
||||
{{1, 11, 6, 7}, {2, 7, 8, 7}, {1, 11, 6, 7}},
|
||||
defaultAxes4D.front()
|
||||
}
|
||||
};
|
||||
@ -391,14 +391,14 @@ const std::vector<ShapeParams> shapeParams4D_Smoke = {
|
||||
const std::vector<ShapeParams> shapeParams4D_Full = {
|
||||
ShapeParams{
|
||||
ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
|
||||
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}}},
|
||||
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {2, 7, 6, 5}, {1, 11, 4, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{{1.f, 1.f, 1.25f, 1.5f}},
|
||||
defaultAxes4D.front()
|
||||
},
|
||||
ShapeParams{
|
||||
ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
|
||||
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {1, 11, 5, 5}}},
|
||||
InputShape{{-1, {2, 20}, -1, -1}, {{1, 11, 4, 4}, {1, 11, 5, 5}, {1, 11, 4, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{{1, 11, 5, 6}},
|
||||
defaultAxes4D.front()
|
||||
@ -638,16 +638,16 @@ const std::vector<ShapeParams> shapeParams5D_Smoke = {
|
||||
},
|
||||
ShapeParams{
|
||||
ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
|
||||
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}},
|
||||
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{1.f, 1.f, 1.25f, 1.5f, 0.5f}, {1.f, 1.f, 1.25f, 1.25f, 1.25f}},
|
||||
{{1.f, 1.f, 1.25f, 1.5f, 0.5f}, {1.f, 1.f, 1.25f, 1.25f, 1.25f}, {1.f, 1.f, 1.25f, 1.5f, 0.5f}},
|
||||
defaultAxes5D.front()
|
||||
},
|
||||
ShapeParams{
|
||||
ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
|
||||
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}},
|
||||
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{1, 11, 6, 7, 2}, {2, 7, 8, 7, 4}},
|
||||
{{1, 11, 6, 7, 2}, {2, 7, 8, 7, 4}, {1, 11, 6, 7, 2}},
|
||||
defaultAxes5D.front()
|
||||
},
|
||||
};
|
||||
@ -655,14 +655,14 @@ const std::vector<ShapeParams> shapeParams5D_Smoke = {
|
||||
const std::vector<ShapeParams> shapeParams5D_Full = {
|
||||
ShapeParams{
|
||||
ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES,
|
||||
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}}},
|
||||
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {2, 7, 6, 5, 8}, {1, 11, 4, 4, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{{1.f, 1.f, 1.25f, 1.5f, 0.5f}},
|
||||
defaultAxes5D.front()
|
||||
},
|
||||
ShapeParams{
|
||||
ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES,
|
||||
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {1, 11, 5, 5, 8}}},
|
||||
InputShape{{-1, {2, 20}, -1, -1, -1}, {{1, 11, 4, 4, 4}, {1, 11, 5, 5, 8}, {1, 11, 4, 4, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{{1, 11, 5, 6, 4}},
|
||||
defaultAxes5D.front()
|
||||
|
Loading…
Reference in New Issue
Block a user