[CPU] Ensure const blob 16 byte alignment on non-AVX platforms (#18705)
This commit is contained in:
parent
aba2770921
commit
c9001980ef
@ -112,21 +112,6 @@ bool Edge::enforceReorder() {
|
||||
}
|
||||
}
|
||||
|
||||
// In case the parent node is an input constant, the memory is unaligned and the child primitive isa is SSE,
|
||||
// we have to insert reorder since the vast majority of arithmetic and data processing instructions in legacy SSE isa requires
|
||||
// the memory address in the operands must be aligned on 16-byte boundary.
|
||||
if ((childSPD->getImplementationType() & impl_desc_type::sse42) &&
|
||||
Type::Input == parentNode->getType() &&
|
||||
parentNode->isConstant()) {
|
||||
if (auto pInputNode = std::dynamic_pointer_cast<node::Input>(parentNode)) {
|
||||
auto rawMemPtr = pInputNode->getMemoryPtr()->getData();
|
||||
bool isAligned = (reinterpret_cast<uintptr_t>(rawMemPtr) & 15) == 0;
|
||||
if (!isAligned) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -287,7 +287,15 @@ void Input::cloneBlobIfRequired() {
|
||||
|
||||
auto isBlobAligned = [&, this] () {
|
||||
const void *ptr = constOp->get_data_ptr();
|
||||
return prec.size() > 1 ? (reinterpret_cast<size_t>(ptr) % prec.size()) == 0 : true;
|
||||
bool blobAlignedOnSSE = true;
|
||||
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
|
||||
// Majority of arithmetic and data processing instructions in legacy SSE isa requires
|
||||
// the memory address in the operands must be aligned on 16-byte boundary. To ensure
|
||||
// safely reusing ngraph const blob memory, need to check address alignment.
|
||||
blobAlignedOnSSE = mayiuse(cpu_isa_t::avx2) || ((reinterpret_cast<uintptr_t>(ptr) & 15) == 0);
|
||||
#endif
|
||||
const bool blobAlignedWithPrec = prec.size() > 1 ? (reinterpret_cast<size_t>(ptr) % prec.size()) == 0 : true;
|
||||
return blobAlignedWithPrec && blobAlignedOnSSE;
|
||||
};
|
||||
|
||||
// The presence of subnormals is better to determined at IR read time.
|
||||
@ -363,6 +371,7 @@ void Input::cloneBlobIfRequired() {
|
||||
};
|
||||
|
||||
auto weightCache = context->getWeightsCache();
|
||||
|
||||
if (weightCache) {
|
||||
MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob);
|
||||
memoryPtr = std::const_pointer_cast<const IMemory>(ptr);
|
||||
|
Loading…
Reference in New Issue
Block a user