[CPU] fix cloneWithUndefStridesAndOffset and cloneWithDefaultStridesA… (#7542)
* [CPU] fix cloneWithUndefStridesAndOffset and cloneWithDefaultStridesAndOffset * status fix * applied comments after review 1 * strides compute was aligned and extracted * review comments
This commit is contained in:
parent
5166994750
commit
3bf34b1166
@ -61,6 +61,20 @@ public:
|
||||
*/
|
||||
virtual size_t getPaddedElementsCount() const = 0;
|
||||
|
||||
/**
|
||||
* @brief Creates MemoryDesc with offsetPadding and strides of UNDEFINED_DIM size
|
||||
*
|
||||
* @return pointer to the new MemoryDesc
|
||||
*/
|
||||
virtual MemoryDescPtr cloneWithUndefStridesAndOffset() const = 0;
|
||||
|
||||
/**
|
||||
* @brief Creates MemoryDesc with offsetPadding of 0 size and default strides
|
||||
*
|
||||
* @return pointer to the new MemoryDesc
|
||||
*/
|
||||
virtual MemoryDescPtr cloneWithDefaultStridesAndOffset() const = 0;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief Check descs on compatibility
|
||||
|
@ -295,3 +295,19 @@ size_t CpuBlockedMemoryDesc::getPaddedElementsCount() const {
|
||||
IE_THROW() << "Can't compute padded elements count for non undefined blocked dims";
|
||||
return std::accumulate(blockedDims.begin(), blockedDims.end(), size_t{1}, std::multiplies<size_t>());
|
||||
}
|
||||
|
||||
MemoryDescPtr CpuBlockedMemoryDesc::cloneWithUndefStridesAndOffset() const {
|
||||
const auto orderSize = getOrder().size();
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(getPrecision(), getShape(), getBlockDims(), getOrder(), Shape::UNDEFINED_DIM,
|
||||
VectorDims(orderSize, 0), VectorDims(orderSize, Shape::UNDEFINED_DIM));
|
||||
}
|
||||
|
||||
MemoryDescPtr CpuBlockedMemoryDesc::cloneWithDefaultStridesAndOffset() const {
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(getPrecision(), getShape(), getBlockDims(), getOrder());
|
||||
}
|
||||
|
||||
MemoryDescPtr CpuBlockedMemoryDesc::cloneWithNewPrecision(const InferenceEngine::Precision prec) const {
|
||||
auto newDesc = std::make_shared<CpuBlockedMemoryDesc>(*this);
|
||||
newDesc->setPrecision(prec);
|
||||
return newDesc;
|
||||
}
|
||||
|
@ -78,6 +78,12 @@ public:
|
||||
|
||||
size_t getPaddedElementsCount() const override;
|
||||
|
||||
MemoryDescPtr cloneWithUndefStridesAndOffset() const override;
|
||||
|
||||
MemoryDescPtr cloneWithDefaultStridesAndOffset() const override;
|
||||
|
||||
MemoryDescPtr cloneWithNewPrecision(const InferenceEngine::Precision prec) const override;
|
||||
|
||||
private:
|
||||
size_t getElementOffset(size_t elemNumber) const override;
|
||||
size_t getCurrentMemSizeImp() const override;
|
||||
|
@ -68,6 +68,8 @@ public:
|
||||
return cloneWithNewDimsImp(dims);
|
||||
}
|
||||
|
||||
virtual MemoryDescPtr cloneWithNewPrecision(const InferenceEngine::Precision prec) const = 0;
|
||||
|
||||
virtual bool isCompatible(const MemoryDesc& rhs) const = 0;
|
||||
|
||||
// Checks that all dimensions, offsets, strides, etc are defined (!= UNDEFINED_DIM)
|
||||
@ -154,7 +156,6 @@ protected:
|
||||
friend class BlobDumper;
|
||||
// WA: optimizedNspc2Ncsp used getElementOffset inside implementation
|
||||
friend class MKLDNNSplitNode;
|
||||
friend MemoryDescPtr MemoryDescUtils::cloneWithNewPrecision(const MemoryDesc& desc, const InferenceEngine::Precision prec);
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -69,51 +69,6 @@ BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDes
|
||||
}
|
||||
}
|
||||
|
||||
MemoryDescPtr MemoryDescUtils::cloneWithUndefStridesAndOffset(const MemoryDesc& desc) {
|
||||
if (desc.getType() == MemoryDescType::Mkldnn) {
|
||||
IE_THROW() << "Can't apply undefined offset for mkldnn memory desc";
|
||||
}
|
||||
|
||||
const auto blkMemDesc = desc.as<BlockedMemoryDesc>();
|
||||
|
||||
VectorDims strides;
|
||||
VectorDims offsetPaddingToData;
|
||||
strides.resize(blkMemDesc->getBlockDims().size(), Shape::UNDEFINED_DIM);
|
||||
offsetPaddingToData.resize(blkMemDesc->getBlockDims().size(), 0);
|
||||
size_t offsetPadding = Shape::UNDEFINED_DIM;
|
||||
|
||||
if (blkMemDesc->getType() == MemoryDescType::Blocked) {
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(blkMemDesc->getPrecision(), blkMemDesc->getShape(), blkMemDesc->getBlockDims(),
|
||||
blkMemDesc->getOrder(), offsetPadding, offsetPaddingToData, strides);
|
||||
} else if (blkMemDesc->getType() == MemoryDescType::DnnlBlocked) {
|
||||
return DnnlBlockedMemoryDescPtr(new DnnlBlockedMemoryDesc(blkMemDesc->getPrecision(), blkMemDesc->getShape(),
|
||||
blkMemDesc->getBlockDims(), blkMemDesc->getOrder(),
|
||||
offsetPadding, offsetPaddingToData, strides));
|
||||
} else {
|
||||
IE_THROW() << "Cannot apply undefined offset. Unsupported memory desc type";
|
||||
}
|
||||
}
|
||||
|
||||
MemoryDescPtr MemoryDescUtils::cloneWithDefaultStridesAndOffset(const MemoryDesc& desc) {
|
||||
const auto blkMemDesc = desc.as<BlockedMemoryDesc>();
|
||||
|
||||
if (MemoryDescType::Blocked == desc.getType()) {
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(blkMemDesc->getPrecision(), blkMemDesc->getShape(),
|
||||
blkMemDesc->getBlockDims(), blkMemDesc->getOrder());
|
||||
} else if (MemoryDescType::DnnlBlocked == desc.getType()) {
|
||||
return DnnlBlockedMemoryDescPtr(new DnnlBlockedMemoryDesc(blkMemDesc->getPrecision(), blkMemDesc->getShape(),
|
||||
blkMemDesc->getBlockDims(), blkMemDesc->getOrder()));
|
||||
} else {
|
||||
IE_THROW() << "cloneWithDefaultStridesAndOffset supports Blocked descriptors only";
|
||||
}
|
||||
}
|
||||
|
||||
MemoryDescPtr MemoryDescUtils::cloneWithNewPrecision(const MemoryDesc& desc, const InferenceEngine::Precision prec) {
|
||||
MemoryDescPtr newDesc = desc.clone();
|
||||
newDesc->setPrecision(prec);
|
||||
return newDesc;
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) {
|
||||
// TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor
|
||||
auto& memDesc = mem.getDesc();
|
||||
|
@ -56,27 +56,6 @@ public:
|
||||
*/
|
||||
static std::shared_ptr<BlockedMemoryDesc> convertToBlockedMemoryDesc(const std::shared_ptr<MemoryDesc> &desc);
|
||||
|
||||
/**
|
||||
* @brief Creates BlockedMemoryDesc with offsetPadding and strides of UNDEFINED_DIM size
|
||||
* @param desc is the MemoryDesc to be cloned
|
||||
* @return pointer to the new MemoryDesc
|
||||
*/
|
||||
static std::shared_ptr<MemoryDesc> cloneWithUndefStridesAndOffset(const MemoryDesc& desc);
|
||||
|
||||
/**
|
||||
* @brief Creates MemoryDesc with offsetPadding of 0 size and default strides
|
||||
* @param desc is the MemoryDesc to be cloned
|
||||
* @return pointer to the new MemoryDesc
|
||||
*/
|
||||
static std::shared_ptr<MemoryDesc> cloneWithDefaultStridesAndOffset(const MemoryDesc& desc);
|
||||
|
||||
/**
|
||||
* @brief Creates MemoryDesc with specified precision
|
||||
* @param desc is the MemoryDesc to be cloned
|
||||
* @return pointer to the new MemoryDesc
|
||||
*/
|
||||
static std::shared_ptr<MemoryDesc> cloneWithNewPrecision(const MemoryDesc& desc, const InferenceEngine::Precision prec);
|
||||
|
||||
/**
|
||||
* @brief Creates InferenceEngine::Blob from MKLDNNMemory with the memory reuse
|
||||
* @param desc MKLDNNMemory from which will be created InferenceEngine::Blob
|
||||
|
@ -89,6 +89,13 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
|
||||
auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims());
|
||||
|
||||
size_t outer_ndims = dims.size();
|
||||
|
||||
auto lastIter = order.begin() + outer_ndims;
|
||||
for (size_t dim = 0; dim < outer_ndims; dim++) {
|
||||
if (std::find(order.begin(), lastIter, dim) == lastIter)
|
||||
IE_THROW() << "Can not construct DnnlBlockedMemoryDesc because of incorrect order: " << vec2str(order);
|
||||
}
|
||||
|
||||
size_t inner_ndims = order.size() - dims.size();
|
||||
|
||||
if (!strides.empty()) {
|
||||
@ -103,16 +110,6 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
|
||||
IE_THROW() << "Can not construct DnnlBlockedMemoryDesc from strides: " << vec2str(strides);
|
||||
}
|
||||
|
||||
VectorDims outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension
|
||||
for (size_t i = 0; i < outer_ndims; i++) {
|
||||
outer_order[order[i]] = i;
|
||||
}
|
||||
bool outer_is_correct_permutation_of_n =
|
||||
std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end();
|
||||
|
||||
if (!outer_is_correct_permutation_of_n)
|
||||
IE_THROW() << "Can not construct DnnlBlockedMemoryDesc because of incorrect order: " << vec2str(order);
|
||||
|
||||
if (!strides.empty() && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) {
|
||||
bool inner_block_are_dense = one_of(strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted
|
||||
for (int i = outer_ndims; i < strides.size() - 1; i++) {
|
||||
@ -161,30 +158,19 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
|
||||
std::copy(dnnlBlkDims.end() - inner_ndims, dnnlBlkDims.end(), dnn_blk_desc.inner_blks);
|
||||
std::copy(order.end() - inner_ndims, order.end(), dnn_blk_desc.inner_idxs);
|
||||
|
||||
this->order = order;
|
||||
initBlockDims();
|
||||
initOffsetPadding();
|
||||
|
||||
if (strides.empty()) {
|
||||
if (std::any_of(dnnlBlkDims.begin(), dnnlBlkDims.end(), [](memory::dim val) { return val == DNNL_RUNTIME_DIM_VAL; })) {
|
||||
std::fill(std::begin(dnn_blk_desc.strides), std::begin(dnn_blk_desc.strides) + outer_ndims, DNNL_RUNTIME_DIM_VAL);
|
||||
} else {
|
||||
//TODO [DS]: phase 2: refactor
|
||||
std::vector<memory::dim> tmpStrides(order.size());
|
||||
tmpStrides[order.size() - 1] = 1;
|
||||
for (size_t i = 2; i <= order.size(); i++) {
|
||||
tmpStrides[order.size() - i] = tmpStrides[order.size() - (i - 1)] * dnnlBlkDims[blockedDims.size() - (i - 1)];
|
||||
}
|
||||
for (size_t i = 0; i < outer_ndims; i++) {
|
||||
dnn_blk_desc.strides[i] = tmpStrides[outer_order[i]];
|
||||
}
|
||||
}
|
||||
this->recomputeDefaultStrides();
|
||||
} else {
|
||||
for (size_t i = 0; i < outer_ndims; i++) {
|
||||
auto dnnlStrides = MKLDNNExtensionUtils::convertToDnnlDims(strides);
|
||||
dnn_blk_desc.strides[i] = dnnlStrides[outer_order[i]];
|
||||
dnn_blk_desc.strides[order[i]] = dnnlStrides[i];
|
||||
}
|
||||
initStrides();
|
||||
}
|
||||
|
||||
this->order = order;
|
||||
|
||||
initBlockedParams();
|
||||
}
|
||||
|
||||
DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format) :
|
||||
@ -272,12 +258,6 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc)
|
||||
const size_t inner_ndims = blk_desc.inner_nblks;
|
||||
const size_t total_ndims = outer_ndims + inner_ndims;
|
||||
|
||||
// strides of inner dims. In case of 4i16o4i will be {64, 4, 1}
|
||||
VectorDims inner_strides(inner_ndims, 1);
|
||||
for (size_t i = 1; i < blk_desc.inner_nblks; i++) {
|
||||
inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i];
|
||||
}
|
||||
|
||||
// total inner block size. in case of 4i16o4i will be {16, 16, 1, 1}
|
||||
VectorDims total_block_per_dim(outer_ndims, 1);
|
||||
for (int i = 0; i < inner_ndims; i++) {
|
||||
@ -794,3 +774,49 @@ void DnnlBlockedMemoryDesc::initStrides() {
|
||||
void DnnlBlockedMemoryDesc::initOffsetPadding() {
|
||||
offsetPaddingToData = VectorDims(std::begin(desc.data.padded_offsets), std::begin(desc.data.padded_offsets) + getOrder().size());
|
||||
}
|
||||
|
||||
MemoryDescPtr DnnlBlockedMemoryDesc::cloneWithUndefStridesAndOffset() const {
|
||||
DnnlBlockedMemoryDescPtr newDesc = std::make_shared<DnnlBlockedMemoryDesc>(*this);
|
||||
auto &dnnlBlkDesc = newDesc->desc.data.format_desc.blocking;
|
||||
std::fill(std::begin(dnnlBlkDesc.strides), std::begin(dnnlBlkDesc.strides) + getShape().getRank(), DNNL_RUNTIME_DIM_VAL);
|
||||
newDesc->initStrides();
|
||||
newDesc->desc.data.offset0 = DNNL_RUNTIME_DIM_VAL;
|
||||
newDesc->status = descStatus::Undefined;
|
||||
return newDesc;
|
||||
}
|
||||
|
||||
MemoryDescPtr DnnlBlockedMemoryDesc::cloneWithDefaultStridesAndOffset() const {
|
||||
DnnlBlockedMemoryDescPtr newDesc = std::make_shared<DnnlBlockedMemoryDesc>(*this);
|
||||
newDesc->recomputeDefaultStrides();
|
||||
newDesc->desc.data.offset0 = 0;
|
||||
newDesc->status = descStatus::Unknown;
|
||||
return newDesc;
|
||||
}
|
||||
|
||||
MemoryDescPtr DnnlBlockedMemoryDesc::cloneWithNewPrecision(const InferenceEngine::Precision prec) const {
|
||||
auto newDesc = std::make_shared<DnnlBlockedMemoryDesc>(*this);
|
||||
newDesc->setPrecision(prec);
|
||||
return newDesc;
|
||||
}
|
||||
|
||||
void DnnlBlockedMemoryDesc::recomputeDefaultStrides() {
|
||||
const auto &rank = getShape().getRank();
|
||||
|
||||
if (order.size() != blockedDims.size())
|
||||
IE_THROW() << "Can't recompute stride: order size != blocked dims size";
|
||||
|
||||
auto &oneDnnStrides = desc.data.format_desc.blocking.strides;
|
||||
if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim val) { return val == Shape::UNDEFINED_DIM; })) {
|
||||
std::fill(std::begin(oneDnnStrides), std::begin(oneDnnStrides) + rank, DNNL_RUNTIME_DIM_VAL);
|
||||
initStrides();
|
||||
} else {
|
||||
strides.resize(order.size());
|
||||
strides[order.size() - 1] = 1;
|
||||
for (size_t i = 2; i <= order.size(); i++) {
|
||||
strides[order.size() - i] = strides[order.size() - (i - 1)] * blockedDims[blockedDims.size() - (i - 1)];
|
||||
}
|
||||
for (size_t i = 0; i < rank; i++) {
|
||||
oneDnnStrides[order[i]] = strides[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -57,10 +57,16 @@ public:
|
||||
|
||||
size_t getPaddedElementsCount() const override;
|
||||
|
||||
MemoryDescPtr cloneWithUndefStridesAndOffset() const override;
|
||||
|
||||
MemoryDescPtr cloneWithDefaultStridesAndOffset() const override;
|
||||
|
||||
MemoryDescPtr cloneWithNewPrecision(const InferenceEngine::Precision prec) const override;
|
||||
|
||||
private:
|
||||
DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& blockedDims,
|
||||
const VectorDims& order, size_t offsetPadding = 0, const VectorDims& offsetPaddingToData = {},
|
||||
const VectorDims& strides = {});
|
||||
const VectorDims& order, size_t offsetPadding = 0, const VectorDims& offsetPaddingToData = {},
|
||||
const VectorDims& strides = {});
|
||||
|
||||
DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc);
|
||||
|
||||
@ -82,6 +88,8 @@ private:
|
||||
void initStrides();
|
||||
void initOffsetPadding();
|
||||
|
||||
void recomputeDefaultStrides();
|
||||
|
||||
/**
|
||||
* Try to define original format tag use on creation
|
||||
*
|
||||
|
@ -72,4 +72,10 @@ size_t DnnlMemoryDesc::getMaxMemSize() const {
|
||||
return getCurrentMemSize();
|
||||
}
|
||||
|
||||
MemoryDescPtr DnnlMemoryDesc::cloneWithNewPrecision(const InferenceEngine::Precision prec) const {
|
||||
auto newDesc = std::make_shared<DnnlMemoryDesc>(*this);
|
||||
newDesc->setPrecision(prec);
|
||||
return newDesc;
|
||||
}
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -46,6 +46,8 @@ public:
|
||||
|
||||
bool hasEmptyExtraData() const { return desc.data.extra.flags == dnnl_memory_extra_flag_none; }
|
||||
|
||||
MemoryDescPtr cloneWithNewPrecision(const InferenceEngine::Precision prec) const override;
|
||||
|
||||
protected:
|
||||
DnnlMemoryDesc() {}
|
||||
static constexpr size_t UNREACHABLE_DIM = std::numeric_limits<size_t>::max();
|
||||
|
@ -1772,7 +1772,7 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) {
|
||||
auto outPrec = outDesc->getPrecision();
|
||||
|
||||
auto reorderInDesc = inDesc;
|
||||
auto reorderOutDesc = MemoryDescUtils::cloneWithNewPrecision(*outDesc, inPrec);
|
||||
auto reorderOutDesc = outDesc->cloneWithNewPrecision(inPrec);
|
||||
|
||||
std::string reorderlayerName = parentParentNode->getName() + "_" +
|
||||
MKLDNNReorderNode::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_" + "fake";
|
||||
|
@ -547,7 +547,7 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() {
|
||||
portConfig.constant = false;
|
||||
auto desc = getSrcMemDesc(itpd, i);
|
||||
if (desc->getType() & MemoryDescType::Blocked) {
|
||||
portConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc);
|
||||
portConfig.desc = desc->as<BlockedMemoryDesc>()->cloneWithUndefStridesAndOffset();
|
||||
} else {
|
||||
portConfig.desc = std::move(desc);
|
||||
}
|
||||
@ -560,7 +560,7 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() {
|
||||
portConfig.constant = false;
|
||||
auto desc = getDstMemDesc(itpd, i);
|
||||
if (desc->getType() & MemoryDescType::Blocked) {
|
||||
portConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc);
|
||||
portConfig.desc = desc->as<BlockedMemoryDesc>()->cloneWithUndefStridesAndOffset();
|
||||
} else {
|
||||
portConfig.desc = std::move(desc);
|
||||
}
|
||||
@ -870,7 +870,7 @@ MemoryDescPtr MKLDNNNode::getDefinedInputDesc(const NodeConfig &config, size_t i
|
||||
|
||||
if (num >= 0) {
|
||||
auto parentConf = selectedPD->getConfig().outConfs[num];
|
||||
parentConf.desc = MemoryDescUtils::cloneWithNewPrecision(*parentConf.desc, config.inConfs[idx].desc->getPrecision());
|
||||
parentConf.desc = parentConf.desc->cloneWithNewPrecision(config.inConfs[idx].desc->getPrecision());
|
||||
if (!parentConf.desc->isDefined() && parentConf.inPlace >= 0)
|
||||
getParentEdgeAt(idx)->getParent()->initOptimalPrimitiveDescriptor();
|
||||
parentConf = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num];
|
||||
@ -879,7 +879,7 @@ MemoryDescPtr MKLDNNNode::getDefinedInputDesc(const NodeConfig &config, size_t i
|
||||
}
|
||||
}
|
||||
|
||||
return MemoryDescUtils::cloneWithDefaultStridesAndOffset(*config.inConfs[idx].desc);
|
||||
return config.inConfs[idx].desc->as<BlockedMemoryDesc>()->cloneWithDefaultStridesAndOffset();
|
||||
}
|
||||
|
||||
MemoryDescPtr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &config, size_t idx) const {
|
||||
@ -898,7 +898,7 @@ MemoryDescPtr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &config, size_t
|
||||
|
||||
if (num >= 0) {
|
||||
auto childConf = selectedPD->getConfig().inConfs[num];
|
||||
childConf.desc = MemoryDescUtils::cloneWithNewPrecision(*childConf.desc, config.outConfs[idx].desc->getPrecision());
|
||||
childConf.desc = childConf.desc->cloneWithNewPrecision(config.outConfs[idx].desc->getPrecision());
|
||||
if (!childConf.desc->isDefined() && childConf.inPlace >= 0)
|
||||
getChildEdgeAt(idx)->getChild()->initOptimalPrimitiveDescriptor();
|
||||
childConf = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num];
|
||||
@ -907,7 +907,7 @@ MemoryDescPtr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &config, size_t
|
||||
}
|
||||
}
|
||||
|
||||
return MemoryDescUtils::cloneWithDefaultStridesAndOffset(*config.outConfs[idx].desc);
|
||||
return config.outConfs[idx].desc->as<BlockedMemoryDesc>()->cloneWithDefaultStridesAndOffset();
|
||||
}
|
||||
|
||||
void MKLDNNNode::initOptimalPrimitiveDescriptor() {
|
||||
|
@ -153,7 +153,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
|
||||
for (size_t i = 0; i < getParentEdges().size(); ++i) {
|
||||
config.inConfs[i].inPlace = -1;
|
||||
config.inConfs[i].constant = false;
|
||||
config.inConfs[i].desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(itr->second->createDesc(inputPrecision, getInputShapeAtPort(i)));
|
||||
config.inConfs[i].desc = itr->second->createDesc(inputPrecision, getInputShapeAtPort(i)).cloneWithUndefStridesAndOffset();
|
||||
}
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
|
||||
if (itr->first != LayoutType::nspc) {
|
||||
@ -396,11 +396,11 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() {
|
||||
if (!isConfigDefined(config)) {
|
||||
for (size_t i = 0; i < config.inConfs.size(); i++) {
|
||||
// Concat doesn't support different precision on inputs
|
||||
config.inConfs[i].desc = MemoryDescUtils::cloneWithNewPrecision(*getDefinedInputDesc(config, i), inputPrecision);
|
||||
config.inConfs[i].desc = getDefinedInputDesc(config, i)->cloneWithNewPrecision(inputPrecision);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < config.outConfs.size(); i++) {
|
||||
config.outConfs[i].desc = MemoryDescUtils::cloneWithNewPrecision(*getDefinedOutputDesc(config, i), outputPrecision);
|
||||
config.outConfs[i].desc = getDefinedOutputDesc(config, i)->cloneWithNewPrecision(outputPrecision);
|
||||
}
|
||||
|
||||
initDescriptor(config);
|
||||
@ -418,7 +418,7 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() {
|
||||
int num = getChildEdgeAt(i)->getOutputNum();
|
||||
if (num >= 0) {
|
||||
auto childConf = getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num];
|
||||
childConf.desc = MemoryDescUtils::cloneWithNewPrecision(*childConf.desc, config.outConfs[i].desc->getPrecision());
|
||||
childConf.desc = childConf.desc->cloneWithNewPrecision(config.outConfs[i].desc->getPrecision());
|
||||
|
||||
if (getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()) {
|
||||
if (!childConf.desc->isDefined() && childConf.inPlace >= 0)
|
||||
@ -432,7 +432,7 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() {
|
||||
}
|
||||
|
||||
// reset undefined offsets
|
||||
config.outConfs[i].desc = MemoryDescUtils::cloneWithDefaultStridesAndOffset(*config.outConfs[i].desc);
|
||||
config.outConfs[i].desc = config.outConfs[i].desc->as<BlockedMemoryDesc>()->cloneWithDefaultStridesAndOffset();
|
||||
}
|
||||
auto firstOutBlockingDesc = config.outConfs[0].desc->as<BlockedMemoryDesc>();
|
||||
size_t offset = 0;
|
||||
|
@ -402,7 +402,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
|
||||
dataConfig.constant = false;
|
||||
auto desc = getSrcMemDesc(itpd, i);
|
||||
if (desc->getType() & MemoryDescType::Blocked && !isGrouped) {
|
||||
dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc);
|
||||
dataConfig.desc = desc->as<BlockedMemoryDesc>()->cloneWithUndefStridesAndOffset();
|
||||
} else {
|
||||
dataConfig.desc = std::move(desc);
|
||||
}
|
||||
@ -436,7 +436,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
|
||||
dataConfig.constant = false;
|
||||
auto desc = getDstMemDesc(itpd, i);
|
||||
if (desc->getType() & MemoryDescType::Blocked && !isGrouped) {
|
||||
dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc);
|
||||
dataConfig.desc = desc->as<BlockedMemoryDesc>()->cloneWithUndefStridesAndOffset();
|
||||
} else {
|
||||
dataConfig.desc = std::move(desc);
|
||||
}
|
||||
@ -445,7 +445,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
|
||||
|
||||
if (withSum) {
|
||||
dataConfig.inPlace = -1;
|
||||
dataConfig.desc = MemoryDescUtils::cloneWithNewPrecision(*dataConfig.desc, dataConfig.desc->getPrecision());
|
||||
dataConfig.desc = dataConfig.desc->cloneWithNewPrecision(dataConfig.desc->getPrecision());
|
||||
config.inConfs.push_back(dataConfig);
|
||||
}
|
||||
}
|
||||
@ -614,7 +614,7 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) {
|
||||
dataConfig.desc = getDstMemDesc(itpd, j);
|
||||
if (withSum) {
|
||||
auto eltwiseConfig = dataConfig;
|
||||
eltwiseConfig.desc = MemoryDescUtils::cloneWithNewPrecision(*eltwiseConfig.desc, eltwisePrecision);
|
||||
eltwiseConfig.desc = eltwiseConfig.desc->cloneWithNewPrecision(eltwisePrecision);
|
||||
cfg.inConfs.push_back(eltwiseConfig);
|
||||
dataConfig.inPlace = getParentEdges().size() - 1;
|
||||
}
|
||||
|
@ -97,7 +97,7 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
|
||||
|
||||
// inp/out layouts must be the same
|
||||
dataConfigOut.desc = config.inConfs[0].desc;
|
||||
dataConfigOut.desc = MemoryDescUtils::cloneWithNewPrecision(*dataConfigOut.desc, output->getPrecision());
|
||||
dataConfigOut.desc = dataConfigOut.desc->cloneWithNewPrecision(output->getPrecision());
|
||||
config.outConfs.push_back(dataConfigOut);
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
|
||||
} else if (inputShapes.size() == 1 && outputShapes.size() == 1) {
|
||||
|
@ -274,7 +274,7 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() {
|
||||
dataConfig.constant = false;
|
||||
auto desc = getSrcMemDesc(itpd, i);
|
||||
if (desc->getType() & MemoryDescType::Blocked) {
|
||||
dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc);
|
||||
dataConfig.desc = desc->as<BlockedMemoryDesc>()->cloneWithUndefStridesAndOffset();
|
||||
} else {
|
||||
dataConfig.desc = std::move(desc);
|
||||
}
|
||||
@ -287,7 +287,7 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() {
|
||||
dataConfig.constant = false;
|
||||
auto desc = getDstMemDesc(itpd, i);
|
||||
if (desc->getType() & MemoryDescType::Blocked) {
|
||||
dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc);
|
||||
dataConfig.desc = desc->as<BlockedMemoryDesc>()->cloneWithUndefStridesAndOffset();
|
||||
} else {
|
||||
dataConfig.desc = std::move(desc);
|
||||
}
|
||||
|
@ -346,7 +346,7 @@ void MKLDNNReorderNode::reorderData(const MKLDNNMemory &input, const MKLDNNMemor
|
||||
outPrc, input.GetSize() / input.getDesc().getPrecision().size());
|
||||
|
||||
MKLDNNMemory tmpMem(output.getEngine());
|
||||
auto tmpDesc = MemoryDescUtils::cloneWithNewPrecision(input.getDesc(), outPrc);
|
||||
auto tmpDesc = input.getDesc().cloneWithNewPrecision(outPrc);
|
||||
tmpMem.Create(std::move(tmpDesc), tmpBuff.data());
|
||||
|
||||
pReorder = std::unique_ptr<mkldnn::reorder>(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive()));
|
||||
|
@ -333,7 +333,7 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
|
||||
}
|
||||
|
||||
// reset undefined offsets
|
||||
config.inConfs[i].desc = MemoryDescUtils::cloneWithDefaultStridesAndOffset(*config.inConfs[i].desc);
|
||||
config.inConfs[i].desc = config.inConfs[i].desc->as<BlockedMemoryDesc>()->cloneWithDefaultStridesAndOffset();
|
||||
}
|
||||
if (config.outConfs.size() != outputShapes.size())
|
||||
THROW_ERROR << "has invalid config";
|
||||
|
@ -98,7 +98,7 @@ TEST(MemDescTest, TurnToUninit) {
|
||||
const MemoryDescPtr blockedDesc = creator->createSharedDesc(Precision::FP32, cpuShape);
|
||||
auto mkldnnDesc = MemoryDescUtils::convertToDnnlMemoryDesc(blockedDesc);
|
||||
|
||||
auto uninitMkldnnDesc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*mkldnnDesc);
|
||||
auto uninitMkldnnDesc = mkldnnDesc->as<BlockedMemoryDesc>()->cloneWithUndefStridesAndOffset();
|
||||
|
||||
ASSERT_TRUE(uninitMkldnnDesc->isCompatible(*mkldnnDesc));
|
||||
|
||||
@ -113,7 +113,7 @@ TEST(MemDescTest, TurnToUninit) {
|
||||
ASSERT_FALSE(blockedDesc->isCompatible(stridedBlockedDesc));
|
||||
ASSERT_TRUE(uninitMkldnnDesc->isCompatible(stridedBlockedDesc));
|
||||
|
||||
auto initMkldnnDesc = MemoryDescUtils::cloneWithDefaultStridesAndOffset(*uninitMkldnnDesc);
|
||||
auto initMkldnnDesc = uninitMkldnnDesc->as<BlockedMemoryDesc>()->cloneWithDefaultStridesAndOffset();
|
||||
|
||||
ASSERT_TRUE(initMkldnnDesc->isCompatible(*blockedDesc));
|
||||
ASSERT_FALSE(initMkldnnDesc->isCompatible(stridedBlockedDesc));
|
||||
@ -356,3 +356,79 @@ TEST(isSameMethodTest, CheckTensorWithSameStrides) {
|
||||
for (const auto &tc : testCases)
|
||||
ASSERT_TRUE(isSameDataFormat(tc.first, tc.second));
|
||||
}
|
||||
|
||||
TEST(cloneWithParamsChange, UndefinedAndDefaultParams) {
|
||||
dnnl::memory::format_tag testCases[] {
|
||||
dnnl::memory::format_tag::nchw,
|
||||
dnnl::memory::format_tag::nhwc,
|
||||
dnnl::memory::format_tag::nChw8c,
|
||||
dnnl::memory::format_tag::nChw16c
|
||||
};
|
||||
|
||||
// DnnlBlockedMemoryDesc with extra
|
||||
auto cloneWithParamsChangeDnnl = [](dnnl::memory::format_tag fmt) {
|
||||
dnnl::memory::desc refOneDnnDesc(dnnl::memory::dims{2, 3, 4, 5}, mkldnn::memory::data_type::u8, fmt);
|
||||
refOneDnnDesc.data.extra.flags = dnnl_memory_extra_flag_compensation_conv_s8s8;
|
||||
refOneDnnDesc.data.extra.compensation_mask = 1;
|
||||
refOneDnnDesc.data.extra.scale_adjust = 2.0f;
|
||||
auto refDesc = MKLDNNExtensionUtils::makeDescriptor(refOneDnnDesc);
|
||||
auto refDnnlBlkDesc = refDesc->as<DnnlBlockedMemoryDesc>();
|
||||
|
||||
auto undefDesc = refDnnlBlkDesc->cloneWithUndefStridesAndOffset();
|
||||
auto undefDnnlBlkDesc = undefDesc->as<DnnlBlockedMemoryDesc>();
|
||||
ASSERT_EQ(refDnnlBlkDesc->getBlockDims(), undefDnnlBlkDesc->getBlockDims());
|
||||
ASSERT_EQ(refDnnlBlkDesc->getOrder(), undefDnnlBlkDesc->getOrder());
|
||||
ASSERT_EQ(refDnnlBlkDesc->getOffsetPaddingToData(), undefDnnlBlkDesc->getOffsetPaddingToData());
|
||||
// undef
|
||||
ASSERT_EQ(Shape::UNDEFINED_DIM, undefDnnlBlkDesc->getOffsetPadding());
|
||||
auto undefStrides = refDnnlBlkDesc->getStrides();
|
||||
std::fill(undefStrides.begin(), undefStrides.begin() + refDnnlBlkDesc->getShape().getRank(), Shape::UNDEFINED_DIM);
|
||||
ASSERT_EQ(undefStrides, undefDnnlBlkDesc->getStrides());
|
||||
ASSERT_FALSE(undefDnnlBlkDesc->isDefined());
|
||||
|
||||
auto definedDesc = undefDnnlBlkDesc->cloneWithDefaultStridesAndOffset();
|
||||
auto definedDnnlBlkDesc = definedDesc->as<DnnlBlockedMemoryDesc>();
|
||||
ASSERT_TRUE(refOneDnnDesc == definedDnnlBlkDesc->as<DnnlMemoryDesc>()->getDnnlDesc());
|
||||
ASSERT_EQ(refDnnlBlkDesc->getBlockDims(), definedDnnlBlkDesc->getBlockDims());
|
||||
ASSERT_EQ(refDnnlBlkDesc->getOrder(), definedDnnlBlkDesc->getOrder());
|
||||
ASSERT_EQ(refDnnlBlkDesc->getOffsetPaddingToData(), definedDnnlBlkDesc->getOffsetPaddingToData());
|
||||
ASSERT_EQ(refDnnlBlkDesc->getOffsetPadding(), definedDnnlBlkDesc->getOffsetPadding());
|
||||
ASSERT_EQ(refDnnlBlkDesc->getStrides(), definedDnnlBlkDesc->getStrides());
|
||||
ASSERT_TRUE(refDnnlBlkDesc->isDefined());
|
||||
};
|
||||
|
||||
for (const auto &tc : testCases) {
|
||||
cloneWithParamsChangeDnnl(tc);
|
||||
}
|
||||
|
||||
// CpuBlockedMemoryDesc
|
||||
auto cloneWithParamsChangeCpu = [](dnnl::memory::format_tag fmt) {
|
||||
dnnl::memory::desc refOneDnnDesc(dnnl::memory::dims{2, 3, 4, 5}, mkldnn::memory::data_type::u8, fmt);
|
||||
auto refDesc = MemoryDescUtils::convertToBlockedMemoryDesc(MKLDNNExtensionUtils::makeDescriptor(refOneDnnDesc));
|
||||
|
||||
auto undefDesc = refDesc->cloneWithUndefStridesAndOffset();
|
||||
auto undefCpuBlkDesc = undefDesc->as<BlockedMemoryDesc>();
|
||||
ASSERT_EQ(refDesc->getBlockDims(), undefCpuBlkDesc->getBlockDims());
|
||||
ASSERT_EQ(refDesc->getOrder(), undefCpuBlkDesc->getOrder());
|
||||
ASSERT_EQ(refDesc->getOffsetPaddingToData(), undefCpuBlkDesc->getOffsetPaddingToData());
|
||||
// undef
|
||||
ASSERT_EQ(Shape::UNDEFINED_DIM, undefCpuBlkDesc->getOffsetPadding());
|
||||
auto undefStrides = refDesc->getStrides();
|
||||
std::fill(undefStrides.begin(), undefStrides.begin() + refDesc->getShape().getRank(), Shape::UNDEFINED_DIM);
|
||||
ASSERT_EQ(undefStrides, undefCpuBlkDesc->getStrides());
|
||||
ASSERT_FALSE(undefCpuBlkDesc->isDefined());
|
||||
|
||||
auto definedDesc = undefCpuBlkDesc->cloneWithDefaultStridesAndOffset();
|
||||
auto definedDnnlBlkDesc = definedDesc->as<BlockedMemoryDesc>();
|
||||
ASSERT_EQ(refDesc->getBlockDims(), definedDnnlBlkDesc->getBlockDims());
|
||||
ASSERT_EQ(refDesc->getOrder(), definedDnnlBlkDesc->getOrder());
|
||||
ASSERT_EQ(refDesc->getOffsetPaddingToData(), definedDnnlBlkDesc->getOffsetPaddingToData());
|
||||
ASSERT_EQ(refDesc->getOffsetPadding(), definedDnnlBlkDesc->getOffsetPadding());
|
||||
ASSERT_EQ(refDesc->getStrides(), definedDnnlBlkDesc->getStrides());
|
||||
ASSERT_TRUE(definedDnnlBlkDesc->isDefined());
|
||||
};
|
||||
|
||||
for (const auto &tc : testCases) {
|
||||
cloneWithParamsChangeCpu(tc);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user