[CPU] StringMemory for the output tensor in the InferRequest (#21746)

This commit is contained in:
Nikolay Shchegolev
2023-12-19 15:20:15 +04:00
committed by GitHub
parent 6060683f4c
commit 6246fb0c23
2 changed files with 52 additions and 19 deletions

View File

@@ -60,6 +60,9 @@ Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bo
m_pMemDesc(desc),
m_mgrHandle(std::make_shared<DnnlMemoryMngr>(make_unique<MemoryMngrWithReuse>()), this),
dnnlMemHandle(this) {
if (desc->getPrecision() == element::string) {
OPENVINO_THROW("[CPU] Memory object cannot be created for string data.");
}
create(m_pMemDesc, data, pads_zeroing);
}
@@ -68,6 +71,9 @@ Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data
Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr) :
m_eng(eng), m_pMemDesc(desc), m_mgrHandle(mngr, this), dnnlMemHandle(this) {
if (desc->getPrecision() == element::string) {
OPENVINO_THROW("[CPU] Memory object can't be created for string data.");
}
bool memAllocated = m_mgrHandle->getRawPtr();
create(desc, nullptr, !memAllocated);
@@ -105,6 +111,9 @@ void Memory::create(MemoryDescPtr desc, const void* data, bool pads_zeroing) {
}
void Memory::load(const IMemory& src, bool ftz) const {
if (src.getDesc().getPrecision() == element::string) {
OPENVINO_THROW("[CPU] Memory object cannot load string data.");
}
transferData(src, *this, ftz);
}
@@ -115,6 +124,9 @@ void Memory::nullify() {
}
void Memory::redefineDesc(MemoryDescPtr desc) {
if (desc->getPrecision() == element::string) {
OPENVINO_THROW("[CPU] Memory object cannot accept a descriptor with a string type.");
}
if (!desc->hasDefinedMaxSize()) {
OPENVINO_THROW("Can not reset descriptor, memory upper bound is unknown.");
}
@@ -445,6 +457,9 @@ void DnnlMemoryMngr::notifyUpdate() {
StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bool pads_zeroing) :
m_eng(eng), m_pMemDesc(desc) {
if (desc->getPrecision() == element::string) {
OPENVINO_THROW("[CPU] StaticMemory object cannot be created for string data.");
}
if (!m_pMemDesc->isDefined()) {
OPENVINO_THROW("Can not create StaticMemory object. The memory desc is undefined");
}
@@ -511,6 +526,9 @@ void StaticMemory::redefineDesc(MemoryDescPtr desc) {
}
void StaticMemory::load(const IMemory& src, bool ftz) const {
if (src.getDesc().getPrecision() == element::string) {
OPENVINO_THROW("[CPU] StaticMemory cannot load string data.");
}
transferData(src, *this, ftz);
}

View File

@@ -175,14 +175,15 @@ std::vector<ov::ProfilingInfo> SyncInferRequest::get_profiling_info() const {
}
static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr<ov::ITensor>& tensor) {
auto& mem = edge->getMemory();
auto mem = edge->getMemoryPtr();
OPENVINO_ASSERT(mem != nullptr, "Edge with name '", edge->name(), "' doesn't have allocated memory object.");
if (tensor->get_element_type() == element::string) {
auto memMngr = dynamic_cast<const StringMemory &>(mem).getStringMemoryMngrPtr();
auto memMngr = dynamic_cast<StringMemory *>(mem.get())->getStringMemoryMngrPtr();
OPENVINO_ASSERT(memMngr);
memMngr->setExtBuff(tensor->data<OvString>(), tensor->get_size());
memMngr->setExtBuff(tensor->data<StringMemory::OvString>(), tensor->get_size());
} else {
auto memMngr = mem.getMemoryMngr();
auto memMngr = mem->getMemoryMngr();
OPENVINO_ASSERT(memMngr);
memMngr->setExtBuff(tensor->data(), tensor->get_byte_size());
}
@@ -546,26 +547,40 @@ void SyncInferRequest::init_tensor(const std::string& name) {
if (!tensor) {
ov::Shape tensor_shape;
const auto model_prec = port.get_element_type();
if (isDynamic) {
const auto model_prec = port.get_element_type();
const auto graph_prec =
output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision();
OutputControlBlock control_block{model_prec, Shape{shape}};
if (model_prec == element::string) {
VectorDims memDims;
auto c_shape = Shape{shape};
for (auto&& dim : c_shape.getDims()) {
memDims.push_back(dim != Shape::UNDEFINED_DIM ? dim : 0);
}
DEBUG_LOG(name,
", tensor ",
control_block.tensor(),
", memmngr ",
control_block.tensor()->get_memory()->getMemoryMngr(),
"memory object ",
control_block.tensor()->get_memory().get());
dnnl::engine eng(dnnl::engine::kind::cpu, 0);
CpuBlockedMemoryDescPtr desc = std::make_shared<CpuBlockedMemoryDesc>(model_prec, Shape{memDims});
auto memory = std::make_shared<StringMemory>(eng, desc);
tensor = control_block.tensor();
if (model_prec == graph_prec)
m_outputControlBlocks.emplace(std::make_pair(name, std::move(control_block)));
tensor = std::make_shared<Tensor>(memory);
} else {
const auto graph_prec =
output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision();
OutputControlBlock control_block{model_prec, Shape{shape}};
DEBUG_LOG(name,
", tensor ",
control_block.tensor(),
", memmngr ",
control_block.tensor()->get_memory()->getMemoryMngr(),
"memory object ",
control_block.tensor()->get_memory().get());
tensor = control_block.tensor();
if (model_prec == graph_prec)
m_outputControlBlocks.emplace(std::make_pair(name, std::move(control_block)));
}
} else {
tensor_shape = shape.to_shape();
tensor = ov::make_tensor(port.get_element_type(), tensor_shape);
tensor = ov::make_tensor(model_prec, tensor_shape);
}
ov::ISyncInferRequest::set_tensor(port, tensor);
} else {