[CPU] Ensure const blob 16 byte alignment on non-AVX platforms (#18705)

This commit is contained in:
Luwei Zhou 2023-07-28 13:01:36 +08:00 committed by GitHub
parent aba2770921
commit c9001980ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 16 deletions

View File

@ -112,21 +112,6 @@ bool Edge::enforceReorder() {
}
}
// In case the parent node is an input constant, the memory is unaligned and the child primitive isa is SSE,
// we have to insert reorder since the vast majority of arithmetic and data processing instructions in legacy SSE isa requires
// the memory address in the operands must be aligned on 16-byte boundary.
if ((childSPD->getImplementationType() & impl_desc_type::sse42) &&
Type::Input == parentNode->getType() &&
parentNode->isConstant()) {
if (auto pInputNode = std::dynamic_pointer_cast<node::Input>(parentNode)) {
auto rawMemPtr = pInputNode->getMemoryPtr()->getData();
bool isAligned = (reinterpret_cast<uintptr_t>(rawMemPtr) & 15) == 0;
if (!isAligned) {
return true;
}
}
}
return false;
}

View File

@ -287,7 +287,15 @@ void Input::cloneBlobIfRequired() {
auto isBlobAligned = [&, this] () {
const void *ptr = constOp->get_data_ptr();
return prec.size() > 1 ? (reinterpret_cast<size_t>(ptr) % prec.size()) == 0 : true;
bool blobAlignedOnSSE = true;
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
// Majority of arithmetic and data processing instructions in legacy SSE isa requires
// the memory address in the operands must be aligned on 16-byte boundary. To ensure
// safely reusing ngraph const blob memory, need to check address alignment.
blobAlignedOnSSE = mayiuse(cpu_isa_t::avx2) || ((reinterpret_cast<uintptr_t>(ptr) & 15) == 0);
#endif
const bool blobAlignedWithPrec = prec.size() > 1 ? (reinterpret_cast<size_t>(ptr) % prec.size()) == 0 : true;
return blobAlignedWithPrec && blobAlignedOnSSE;
};
// The presence of subnormals is better to determined at IR read time.
@ -363,6 +371,7 @@ void Input::cloneBlobIfRequired() {
};
auto weightCache = context->getWeightsCache();
if (weightCache) {
MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob);
memoryPtr = std::const_pointer_cast<const IMemory>(ptr);