[CPU] Enable String data type (#21465)

This commit is contained in:
Nikolay Shchegolev 2023-12-15 15:01:39 +04:00 committed by GitHub
parent 948b7405c7
commit e00508368d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 640 additions and 141 deletions

View File

@ -2,24 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <oneapi/dnnl/dnnl.hpp>
#include <vector>
#include <numeric>
#include <unordered_set>
#include <dnnl_types.h>
#include <common/memory_desc_wrapper.hpp>
#include "cpu_memory.h"
#include "nodes/common/cpu_memcpy.h"
#include "nodes/common/cpu_convert.h"
#include "onednn/dnnl.h"
#include "cpu_shape.h"
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "nodes/reorder.h"
#include "memory_desc/cpu_memory_desc.h"
using namespace InferenceEngine;
using namespace dnnl;
namespace ov {
namespace intel_cpu {
@ -169,7 +153,7 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const {
// Equivalent of constructor memory(const primitive_desc &desc, void *hdl)
// but with ability to skip pads zeroing.
auto desc = MemoryDescUtils::convertToDnnlMemoryDesc(m_memObjPtr->getDescPtr());
m_prim = memory(desc->getDnnlDesc(), m_memObjPtr->getEngine(), DNNL_MEMORY_NONE);
m_prim = dnnl::memory(desc->getDnnlDesc(), m_memObjPtr->getEngine(), DNNL_MEMORY_NONE);
//
// ========================
auto data = m_memObjPtr->getDataNoThrow();
@ -288,6 +272,129 @@ void MemoryMngrRealloc::destroy(void *ptr) {
dnnl::impl::free(ptr);
}
/////////////// StringMemory ///////////////
StringMemory::StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const void* data) : m_engine(engine), m_mem_desc(desc) {
if (m_mem_desc->getPrecision() != element::string) {
OPENVINO_THROW("[CPU] StringMemory supports String type only.");
}
m_manager = std::make_shared<StringMemoryMngr>();
if (!m_mem_desc->isDefined()) {
return;
}
m_size = m_mem_desc->getCurrentMemSize();
const auto string_size = m_mem_desc->getShape().getElementsCount();
if (data != nullptr) {
auto not_const_data = const_cast<void *>(data);
m_manager->setExtBuff(reinterpret_cast<OvString *>(not_const_data), string_size);
} else {
m_manager->resize(string_size);
}
}
void StringMemory::load(const IMemory& src, bool ftz) const {
if (src.getDesc().getPrecision() != element::string) {
OPENVINO_THROW("[CPU] String memory cannot load a non-string object.");
}
transferData(src, *this, false);
}
void* StringMemory::getData() const {
return m_manager->getRawPtr();
}
void StringMemory::redefineDesc(MemoryDescPtr desc) {
if (desc->getPrecision() != element::string) {
OPENVINO_THROW("[CPU] StringMemory supports String type only.");
}
if (!desc->hasDefinedMaxSize()) {
OPENVINO_THROW("[CPU] StringMemory cannot reset descriptor. Memory upper bound is unknown.");
}
m_mem_desc = desc;
const auto string_size = m_mem_desc->getShape().getElementsCount();
m_manager->resize(string_size);
}
void StringMemory::nullify() {
auto data_ptr = m_manager->getStringPtr();
if (data_ptr != nullptr) {
std::fill(data_ptr, data_ptr + m_manager->getStrLen(), OvString());
}
}
bool StringMemory::isAllocated() const noexcept {
if (getData()) {
return true;
}
if (!m_mem_desc) {
return false;
}
if (!(m_mem_desc->isDefined())) {
return true;
}
if (m_mem_desc->getCurrentMemSize() == 0) {
return true;
}
return false;
}
MemoryMngrPtr StringMemory::getMemoryMngr() const {
OPENVINO_THROW("Unexpected call of StringMemory::getMemoryMngr()");
}
dnnl::memory StringMemory::getPrimitive() const {
OPENVINO_THROW("Unexpected call of StringMemory::getPrimitive()");
}
void StringMemory::StringMemoryMngr::setExtBuff(OvString* ptr, size_t size) {
m_use_external_storage = true;
m_str_upper_bound = size;
m_data = decltype(m_data)(ptr, release);
}
StringMemory::OvString* StringMemory::StringMemoryMngr::getStringPtr() const noexcept {
return m_data.get();
}
bool StringMemory::StringMemoryMngr::resize(size_t size) {
bool sizeChanged = false;
if (size > m_str_upper_bound) {
auto ptr = new OvString[size];
if (!ptr) {
OPENVINO_THROW("Failed to allocate ", size, " bytes of memory");
}
m_str_upper_bound = size;
m_use_external_storage = false;
m_data = decltype(m_data)(ptr, destroy);
sizeChanged = true;
}
return sizeChanged;
}
bool StringMemory::StringMemoryMngr::hasExtBuffer() const noexcept {
return m_use_external_storage;
}
size_t StringMemory::StringMemoryMngr::getStrLen() const noexcept {
return m_str_upper_bound;
}
void StringMemory::StringMemoryMngr::destroy(OvString* ptr) {
delete[] ptr;
}
void* StringMemory::StringMemoryMngr::getRawPtr() const noexcept {
return reinterpret_cast<void *>(m_data.get());
}
/////////////// DnnlMemoryMngr ///////////////
void* DnnlMemoryMngr::getRawPtr() const noexcept {
return m_pMemMngr->getRawPtr();
}
@ -348,7 +455,7 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo
// ========================
// Equivalent of constructor memory(const primitive_desc &desc, void *hdl)
// but with ability to skip pads zeroing.
m_prim = memory(dnnl_desc->getDnnlDesc(), m_eng, DNNL_MEMORY_NONE);
m_prim = dnnl::memory(dnnl_desc->getDnnlDesc(), m_eng, DNNL_MEMORY_NONE);
//
// ========================
if (pads_zeroing)

View File

@ -4,19 +4,8 @@
#pragma once
#include "ie_layouts.h"
#include "memory_desc/cpu_memory_desc.h"
#include "dnnl_extension_utils.h"
#include "memory_desc/cpu_memory_desc_utils.h"
#include <onednn/dnnl.h>
#include <cpu_shape.h>
#include "memory_desc/dnnl_memory_desc.h"
#include <string>
#include <functional>
#include <memory>
#include <vector>
/**
* @file contains a concept classes to work with memory/tensor/blob abstractions on plugin level.
@ -364,8 +353,90 @@ private:
}
};
class StringMemory : public IMemory {
public:
using OvString = ov::element_type_traits<ov::element::string>::value_type;
class StringMemoryMngr {
public:
StringMemoryMngr() : m_data(nullptr, release) {}
OvString* getStringPtr() const noexcept;
void setExtBuff(OvString* ptr, size_t size);
size_t getStrLen() const noexcept;
void* getRawPtr() const noexcept;
bool resize(size_t size /* string elements number */);
bool hasExtBuffer() const noexcept;
private:
bool m_use_external_storage = false;
size_t m_str_upper_bound = 0lu;
std::unique_ptr<OvString, void (*)(OvString *)> m_data;
static void release(OvString* ptr) {}
static void destroy(OvString* ptr);
};
using StringMemoryMngrPtr = std::shared_ptr<StringMemoryMngr>;
StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const void* data = nullptr);
StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const void* data = nullptr)
: StringMemory(engine, desc.clone(), data) {}
StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const StringMemoryMngrPtr& manager)
: m_engine(engine), m_mem_desc(desc), m_manager(manager) {}
StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const StringMemoryMngrPtr& manager)
: StringMemory(engine, desc.clone(), manager) {}
bool isAllocated() const noexcept override;
const MemoryDesc& getDesc() const override {
return *m_mem_desc;
}
MemoryDescPtr getDescPtr() const override {
return m_mem_desc;
}
void* getData() const override;
size_t getSize() const override { // In bytes
return m_size;
}
const Shape& getShape() const override {
return m_mem_desc->getShape();
}
const VectorDims& getStaticDims() const override {
return m_mem_desc->getShape().getStaticDims();
}
void redefineDesc(MemoryDescPtr desc) override;
void load(const IMemory& src, bool ftz = false) const override;
MemoryMngrPtr getMemoryMngr() const override;
StringMemoryMngrPtr getStringMemoryMngrPtr() const {
return m_manager;
}
dnnl::memory getPrimitive() const override;
void nullify() override;
private:
dnnl::engine m_engine;
MemoryDescPtr m_mem_desc;
StringMemoryMngrPtr m_manager;
size_t m_size;
};
using MemoryPtr = std::shared_ptr<IMemory>;
using MemoryCPtr = std::shared_ptr<const IMemory>;
using StringMemoryPtr = std::shared_ptr<StringMemory>;
} // namespace intel_cpu
} // namespace ov

View File

@ -56,7 +56,6 @@
using namespace dnnl;
using namespace InferenceEngine;
using namespace InferenceEngine::details;
namespace ov {
namespace intel_cpu {
@ -605,6 +604,43 @@ void Graph::AllocateWithReuse() {
erase = true;
break;
}
// Special allocation for string tensors
if (edge->getDesc().getPrecision() == element::string && edge->getStatus() == Edge::Status::NeedAllocation) {
StringMemory::StringMemoryMngrPtr mngr;
if (edge->getParent()->isConstant()) {
if (edge->getParent()->getType() == Type::Input) {
auto constNode = static_cast<node::Input *>(edge->getParent().get());
edge->reuse(std::const_pointer_cast<IMemory>(constNode->getMemoryPtr()));
} else {
edge->externalAllocate(context->getWeightsCache());
}
auto stringMemory = dynamic_cast<StringMemory *>(edge->getMemoryPtr().get());
OPENVINO_ASSERT(stringMemory, "[CPU] Edge between nodes '",
edge->getParent()->getName(), "' and '", edge->getChild()->getName(), "' must have StringMemory.");
mngr = stringMemory->getStringMemoryMngrPtr();
} else {
auto memory = std::make_shared<StringMemory>(getEngine(), edge->getDesc());
edge->reuse(memory);
mngr = memory->getStringMemoryMngrPtr();
}
for (auto& edge_c : cluster) {
if (edge_c == edge) {
continue;
}
OPENVINO_ASSERT(edge_c->getDesc().getPrecision() == element::string, "All edges in the cluster must be string.");
if (edge_c->getStatus() == Edge::Status::NotAllocated) {
auto memory = std::make_shared<StringMemory>(getEngine(), edge_c->getDesc(), mngr);
edge_c->reuse(memory);
} else {
OPENVINO_THROW("[CPU] String tensors allocation in the cluster. Edge between nodes '", edge_c->getParent()->getName(), "' and '",
edge_c->getChild()->getName(), "' has an unexpected status: ", static_cast<int>(edge_c->getStatus()));
}
}
erase = true;
continue;
}
// Special allocation for constants
if (edge->getStatus() != Edge::Status::NeedAllocation || !edge->getParent()->isConstant()) {
continue;
@ -904,7 +940,10 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor>& inp
auto ext_tensor_desc = MemoryDescUtils::generateCpuBlockedMemoryDesc(input);
auto actualDesc = edgeMemory->getDescPtr();
if (!actualDesc->isCompatible(*ext_tensor_desc)) {
if (actualDesc->getPrecision() == element::string) {
StringMemory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr);
edgeMemory->load(ext_mem);
} else if (!actualDesc->isCompatible(*ext_tensor_desc)) {
Memory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr, false);
edgeMemory->load(ext_mem, false);
} else {
@ -983,7 +1022,10 @@ void Graph::PullOutputData(std::unordered_map<std::string, ov::SoPtr<ITensor>>&
// That is the same memory. No need to copy
if (ext_blob_ptr == intr_blob_ptr) continue;
if (!actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) {
if (actualDesc->getPrecision() == element::string) {
StringMemory outBloMem(getEngine(), expected_desc_ptr, ext_blob_ptr);
outBloMem.load(intr_blob);
} else if (!actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) {
Memory outBloMem(getEngine(), expected_desc_ptr, ext_blob_ptr, false);
outBloMem.load(intr_blob, false);
} else {

View File

@ -26,6 +26,8 @@
#include "utils/cpu_utils.hpp"
#include "utils/general_utils.h"
using OvString = ov::element_type_traits<ov::element::string>::value_type;
namespace ov {
namespace intel_cpu {
SyncInferRequest::SyncInferRequest(std::shared_ptr<const CompiledModel> compiled_model)
@ -173,11 +175,17 @@ std::vector<ov::ProfilingInfo> SyncInferRequest::get_profiling_info() const {
}
static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr<ov::ITensor>& tensor) {
auto size = tensor->get_byte_size();
auto& mem = edge->getMemory();
auto memMngr = mem.getMemoryMngr();
OPENVINO_ASSERT(memMngr);
memMngr->setExtBuff(tensor->data(), size);
if (tensor->get_element_type() == element::string) {
auto memMngr = dynamic_cast<const StringMemory &>(mem).getStringMemoryMngrPtr();
OPENVINO_ASSERT(memMngr);
memMngr->setExtBuff(tensor->data<OvString>(), tensor->get_size());
} else {
auto memMngr = mem.getMemoryMngr();
OPENVINO_ASSERT(memMngr);
memMngr->setExtBuff(tensor->data(), tensor->get_byte_size());
}
}
void SyncInferRequest::change_default_ptr() {

View File

@ -3,25 +3,19 @@
//
#include "cpu_convert.h"
#include "cpu_memcpy.h"
#include "openvino/core/parallel.hpp"
#include <utils/bfloat16.hpp>
#include <utils/general_utils.h>
#include <selective_build.h>
#include <openvino/core/type/float16.hpp>
#include <algorithm>
#include <type_traits>
#include <tuple>
#include <cmath>
#include <onednn/dnnl.h>
#include "utils/bfloat16.hpp"
#if defined(OPENVINO_ARCH_X86_64)
#include "nodes/kernels/x64/jit_kernel.hpp"
#include <cpu/x64/jit_generator.hpp>
#else
#include "cpu_memory.h"
#include "openvino/core/type/element_type_traits.hpp"
#include "selective_build.h"
#include "utils/general_utils.h"
#endif
using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {
namespace {
@ -583,7 +577,11 @@ void cpu_convert(const void *srcPtr,
if (srcPrc == dstPrc && srcPrc == interimPrc) {
const size_t L2_cache_size = dnnl::utils::get_cache_size(2, true);
const size_t totalSize = size * dstPrc.size();
if (totalSize >= L2_cache_size) {
if (srcPrc == element::string) {
auto str_src = reinterpret_cast<const StringMemory::OvString *>(srcPtr);
auto str_dst = reinterpret_cast<StringMemory::OvString *>(dstPtr);
std::copy(str_src, str_src + size, str_dst);
} else if (totalSize >= L2_cache_size) {
auto src = static_cast<const uint8_t *>(srcPtr);
auto dst = static_cast<uint8_t *>(dstPtr);
parallel_nt(0, [&](const size_t ithr, const size_t nthr) {

View File

@ -3,28 +3,12 @@
//
#include "input.h"
#include "common/cpu_memcpy.h"
#include <dnnl_extension_utils.h>
#include <string>
#include <tuple>
#include <algorithm>
#include <cmath>
#include <utils/general_utils.h>
#include "cpu/x64/jit_generator.hpp"
#include "openvino/core/parallel.hpp"
#include <ie_ngraph_utils.hpp>
#include "caseless.hpp"
#include "common/cpu_memcpy.h"
#include "common/cpu_convert.h"
#include "utils/cpu_utils.hpp"
#include <cpu/x64/jit_generator.hpp>
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "shape_inference/shape_inference_pass_through.hpp"
using namespace dnnl;
using namespace InferenceEngine;
using namespace details;
using namespace ov::op;
using namespace dnnl::impl::cpu::x64;
using namespace Xbyak;
@ -234,11 +218,11 @@ jit_has_subnormals_base::fn_t jit_has_subnormals_function() {
Input::Input(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
: Node(op, context, PassThroughShapeInferFactory()) {
if (!one_of(op->get_type_info(),
v0::Parameter::get_type_info_static(),
v0::Constant::get_type_info_static(),
v0::Result::get_type_info_static(),
v3::ReadValue::get_type_info_static(),
v6::ReadValue::get_type_info_static()))
op::v0::Parameter::get_type_info_static(),
op::v0::Constant::get_type_info_static(),
op::v0::Result::get_type_info_static(),
op::v3::ReadValue::get_type_info_static(),
op::v6::ReadValue::get_type_info_static()))
OPENVINO_THROW_NOT_IMPLEMENTED("CPU Input node doesn't support ngraph operation ",
op->get_type_name(),
" with name ",
@ -246,7 +230,7 @@ Input::Input(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr conte
constant = ConstantType::NoConst;
constOp = ov::as_type_ptr<ov::op::v0::Constant>(op);
constOp = ov::as_type_ptr<op::v0::Constant>(op);
if (constOp) {
constant = ConstantType::Const;
cloneBlobIfRequired();
@ -275,13 +259,29 @@ void Input::cloneBlobIfRequired() {
// but ngraph Constant uses actual bitWidth for data storage allocation
// in that case we make a copy to avoid overflow
if (constOp->get_byte_size() >= memDesc.getCurrentMemSize()) {
memory = std::make_shared<Memory>(getEngine(), memDesc, constOp->get_data_ptr());
if (constOp->get_element_type() == element::string) {
memory = std::make_shared<StringMemory>(getEngine(), memDesc, constOp->get_data_ptr<element::string>());
} else {
memory = std::make_shared<Memory>(getEngine(), memDesc, constOp->get_data_ptr());
}
} else {
memory = std::make_shared<Memory>(getEngine(), memDesc);
memcpy(memory->getData(), constOp->get_data_ptr(), constOp->get_byte_size());
if (constOp->get_element_type() == element::string) {
memory = std::make_shared<StringMemory>(getEngine(), memDesc);
auto src = constOp->get_data_ptr<StringMemory::OvString>();
auto dst = reinterpret_cast<StringMemory::OvString *>(memory->getData());
std::copy(src, src + size, dst);
} else {
memory = std::make_shared<Memory>(getEngine(), memDesc);
memcpy(memory->getData(), constOp->get_data_ptr(), constOp->get_byte_size());
}
}
MemoryPtr ptr = std::make_shared<StaticMemory>(getEngine(), memDesc);
MemoryPtr ptr;
if (memDesc.getPrecision() == element::string) {
ptr = std::make_shared<StringMemory>(getEngine(), memDesc);
} else {
ptr = std::make_shared<StaticMemory>(getEngine(), memDesc);
}
ptr->load(*memory.get(), needFlushDenormalsToZero);
return ptr;
@ -381,7 +381,7 @@ void Input::cloneBlobIfRequired() {
memoryPtr = std::const_pointer_cast<const IMemory>(ptr);
// IRs already have all subnormals flushed to zero, but in
// read_model scenario with directly loaded original model still can have subnormals
} else if (isBlobAligned() && (!needFlushDenormalsToZero || !hasSubnormals()) && !isWA()) {
} else if (prec != element::string && isBlobAligned() && (!needFlushDenormalsToZero || !hasSubnormals()) && !isWA()) {
memoryPtr = std::make_shared<Memory>(getEngine(), memDesc, constOp->get_data_ptr());
} else {
memoryPtr = std::const_pointer_cast<const IMemory>(cloneBlob());
@ -420,14 +420,14 @@ MemoryCPtr Input::getMemoryPtr() const {
void Input::getSupportedDescriptors() {
if (getType() == Type::Input) {
if (!getParentEdges().empty())
OPENVINO_THROW("Incorrect number of input edges for layer ", getName());
THROW_CPU_NODE_ERR("has incorrect number of input edges.");
if (getChildEdges().empty())
OPENVINO_THROW("Incorrect number of output edges for layer ", getName());
THROW_CPU_NODE_ERR("has incorrect number of output edges.");
} else if (getType() == Type::Output) {
if (getParentEdges().size() != 1)
OPENVINO_THROW("Incorrect number of input edges for layer ", getName());
THROW_CPU_NODE_ERR("has incorrect number of input edges.");
if (!getChildEdges().empty())
OPENVINO_THROW("Incorrect number of output edges for layer ", getName());
THROW_CPU_NODE_ERR("has incorrect number of output edges.");
}
}
@ -446,19 +446,19 @@ void Input::createPrimitive() {
for (size_t i = 0; i < getChildEdges().size(); i++) {
auto dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->isAllocated())
OPENVINO_THROW("Destination memory didn't allocate for node ", getName()
, " to node ", getChildEdgeAt(i)->getChild()->getName(), ".");
THROW_CPU_NODE_ERR("has unallocated memory object at port ", i,
" to node ", getChildEdgeAt(i)->getChild()->getName(), ".");
}
for (size_t i = 0; i < getParentEdges().size(); i++) {
auto srcMemPtr = getParentEdgeAt(i)->getMemoryPtr();
if (!srcMemPtr || !srcMemPtr->isAllocated())
OPENVINO_THROW("Destination memory didn't allocate for node ", getName()
, " from node ", getParentEdgeAt(i)->getParent()->getName(), ".");
THROW_CPU_NODE_ERR("has unallocated memory object at port ", i,
" from node ", getParentEdgeAt(i)->getParent()->getName(), ".");
}
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
OPENVINO_THROW("Preferable primitive descriptor is not set for node ", getName(), ".");
THROW_CPU_NODE_ERR("doesn't have selected primitive descriptor.");
}
bool Input::created() const {

View File

@ -4,10 +4,8 @@
#pragma once
#include <ie_common.h>
#include <node.h>
#include <openvino/op/constant.hpp>
#include <string>
namespace ov {
namespace intel_cpu {

View File

@ -3,13 +3,7 @@
//
#include "reference.h"
#include "common/cpu_memcpy.h"
#include <ie_ngraph_utils.hpp>
#include "openvino/core/shape_util.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
namespace ov {
namespace intel_cpu {
@ -94,7 +88,13 @@ void Reference::executeDynamicImpl(dnnl::stream strm) {
if (memory->getSize() != tensor.get_byte_size()) {
THROW_CPU_NODE_ERR("output tensor data size mismatch occurred during the inference on output port number ", i);
}
cpu_memcpy(memory->getData(), tensor.data(), tensor.get_byte_size());
if (tensor.get_element_type() == element::string) {
auto srcPtr = tensor.data<StringMemory::OvString>();
auto dstPtr = reinterpret_cast<StringMemory::OvString *>(memory->getData());
std::copy(srcPtr, srcPtr + tensor.get_size(), dstPtr);
} else {
cpu_memcpy(memory->getData(), tensor.data(), tensor.get_byte_size());
}
}
}
}

View File

@ -3,24 +3,14 @@
//
#include "reorder.h"
#include <memory>
#include <string>
#include <algorithm>
#include <dnnl_types.h>
#include <dnnl_extension_utils.h>
#include "openvino/core/parallel.hpp"
#include "utils/general_utils.h"
#include <cpu/x64/cpu_isa_traits.hpp>
#include "nodes/common/cpu_memcpy.h"
#include "nodes/common/cpu_convert.h"
#include "nodes/common/reorder_prim.h"
#include "convert.h"
#include <common/primitive_hashing_utils.hpp>
#include <shape_inference/shape_inference_pass_through.hpp>
#include "executors/transpose_list.hpp"
using namespace dnnl;
using namespace InferenceEngine;
#include "convert.h"
#include "cpu/x64/cpu_isa_traits.hpp"
#include "nodes/common/cpu_convert.h"
#include "nodes/common/cpu_memcpy.h"
#include "nodes/common/reorder_prim.h"
#include "openvino/core/parallel.hpp"
#include "shape_inference/shape_inference_pass_through.hpp"
namespace ov {
namespace intel_cpu {
@ -32,7 +22,7 @@ bool Reorder::isExecutable() const {
Reorder::Reorder(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context) :
Node(op, context, PassThroughShapeInferFactory()) {
OPENVINO_THROW("Can't create reorder node from ngraph node");
THROW_CPU_NODE_ERR("could not create CPU node from Core node.");
}
Reorder::Reorder(const std::string& name, const GraphContext::CPtr context) :
@ -40,9 +30,9 @@ Reorder::Reorder(const std::string& name, const GraphContext::CPtr context) :
void Reorder::getSupportedDescriptors() {
if (getParentEdges().size() != 1)
OPENVINO_THROW("Incorrect number of input edges for layer ", getName());
THROW_CPU_NODE_ERR("has incorrect number of input edges.");
if (getChildEdges().empty())
OPENVINO_THROW("Incorrect number of output edges for layer ", getName());
THROW_CPU_NODE_ERR("has incorrect number of output edges.");
}
void Reorder::initSupportedPrimitiveDescriptors() {
@ -71,7 +61,7 @@ void Reorder::initSupportedPrimitiveDescriptors() {
config.inConfs[0].setMemDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].getMemDesc());
config.outConfs[0].setMemDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc());
} else {
OPENVINO_THROW("Cannot initialize supported PDs for Reorder node with name `", getName(), "`");
THROW_CPU_NODE_ERR("could not initialize supported PDs.");
}
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::reorder);
@ -83,9 +73,7 @@ void Reorder::initSupportedPrimitiveDescriptors() {
}
if (isDynamic && (config.inConfs[0].getMemDesc()->getShape().getRank() != config.outConfs[0].getMemDesc()->getShape().getRank()))
OPENVINO_THROW("Reorder node with name: ",
getName(),
" doesn't support case when input and output shapes have different rank and dynamic");
THROW_CPU_NODE_ERR("doesn't support case when input and output shapes have different rank and dynamic.");
if (!isOptimized) {
const auto &inShape = getInputShapeAtPort(0);
if (one_of(inShape.getRank(), 4u, 5u) &&
@ -95,7 +83,7 @@ void Reorder::initSupportedPrimitiveDescriptors() {
config.outConfs[0].getMemDesc()->getPrecision() == ov::element::f32) {
// oneDNN JIT reorder shows bad perf for nspc to ncsp reorder case so we fallback on simple c++ implementation
isNspc2NcspCase = true;
} else if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) &&
} else if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2) &&
one_of(inShape.getRank(), 4u, 5u) &&
config.inConfs[0].getMemDesc()->hasLayoutType(LayoutType::ncsp) &&
config.outConfs[0].getMemDesc()->hasLayoutType(LayoutType::nspc) &&
@ -180,11 +168,11 @@ void Reorder::prepareParams() {
auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->isAllocated())
OPENVINO_THROW("Destination memory didn't allocate.");
THROW_CPU_NODE_ERR("has unallocated destination memory object.");
if (!srcMemPtr || !srcMemPtr->isAllocated())
OPENVINO_THROW("Input memory didn't allocate.");
THROW_CPU_NODE_ERR("has unallocated input memory object.");
if (getSelectedPrimitiveDescriptor() == nullptr)
OPENVINO_THROW("Preferable primitive descriptor is not set.");
THROW_CPU_NODE_ERR("does not have preferable primitive descriptor.");
auto isSupportedDesc = [](const MemoryDesc& desc) {
if (!desc.isDefined()) {
@ -239,11 +227,11 @@ void Reorder::prepareParams() {
}
if (!canUseNcsp2Nspc && !canUseNspc2Ncsp) {
if (!dstMemPtr || !dstMemPtr->isAllocated())
OPENVINO_THROW("Destination memory didn't allocate.");
THROW_CPU_NODE_ERR("has unallocated destination memory object.");
if (!srcMemPtr || !srcMemPtr->isAllocated())
OPENVINO_THROW("Input memory didn't allocate.");
THROW_CPU_NODE_ERR("has unallocated input memory object.");
if (getSelectedPrimitiveDescriptor() == nullptr)
OPENVINO_THROW("Preferable primitive descriptor is not set.");
THROW_CPU_NODE_ERR("does not have preferable primitive descriptor.");
createReorderPrimitive(srcMemPtr->getDescWithType<DnnlMemoryDesc>()->getDnnlDesc(), srcMemPtr->getData(),
dstMemPtr->getDescWithType<DnnlMemoryDesc>()->getDnnlDesc(), dstMemPtr->getData());
@ -256,7 +244,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc,
void* dstPtr) {
auto selectedPD = getSelectedPrimitiveDescriptor();
if (!selectedPD)
OPENVINO_THROW("Preferable primitive descriptor is not set.");
THROW_CPU_NODE_ERR("does not have preferable primitive descriptor.");
const auto engine = getEngine();
src_blocked = std::make_shared<Memory>(engine, DnnlExtensionUtils::makeDescriptor(srcDesc), srcPtr, false);
@ -301,7 +289,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc,
auto result = getReorderPrim(context->getParamsCache(), getEngine(), src_desc, dst_desc);
if (!result) {
OPENVINO_THROW("Cannot create reorder primitive: unsupported reorder case");
THROW_CPU_NODE_ERR("could not create reorder primitive: unsupported reorder case.");
}
prim = result;
@ -419,7 +407,7 @@ void Reorder::execute(dnnl::stream strm) {
if (prim) {
prim.execute(strm, primArgs);
} else {
OPENVINO_THROW("Reorder node with name ", getName(), " doesn't have an initialized primitive");
THROW_CPU_NODE_ERR("doesn't have an initialized primitive.");
}
}
}
@ -448,11 +436,17 @@ void Reorder::reorderData(const IMemory &input, const IMemory &output, MultiCach
}
if (input.getDesc().isCompatible(output.getDesc())) {
auto srcPtr = static_cast<uint8_t*>(input.getData());
auto dstPtr = static_cast<uint8_t*>(output.getData());
if (input.getDesc().getPrecision() == element::string) {
auto srcPtr = reinterpret_cast<StringMemory::OvString *>(input.getData());
auto dstPtr = reinterpret_cast<StringMemory::OvString *>(output.getData());
std::copy(srcPtr, srcPtr + output.getShape().getElementsCount(), dstPtr);
} else {
auto srcPtr = static_cast<uint8_t*>(input.getData());
auto dstPtr = static_cast<uint8_t*>(output.getData());
auto copySize = output.getSize();
cpu_memcpy(dstPtr, srcPtr, copySize);
auto copySize = output.getSize();
cpu_memcpy(dstPtr, srcPtr, copySize);
}
} else {
dnnl::reorder reorder;
std::vector<uint8_t> tmpBuff;

View File

@ -4,13 +4,11 @@
#pragma once
#include <ie_common.h>
#include <node.h>
#include <string>
#include <memory>
#include <vector>
#if defined(OV_CPU_ARM_ENABLE_FP16)
#include "nodes/executors/transpose.hpp"
#include <utils/general_utils.h>
#endif
namespace ov {
namespace intel_cpu {

View File

@ -533,7 +533,8 @@ Engine::compile_model(const std::shared_ptr<const ov::Model>& model, const ov::A
ov::element::Type_t::f16,
ov::element::Type_t::f32,
ov::element::Type_t::f64,
ov::element::Type_t::boolean};
ov::element::Type_t::boolean,
ov::element::Type_t::string};
if (!supported_precisions.count(input_precision)) {
OPENVINO_THROW_NOT_IMPLEMENTED("CPU plugin: Input image format ",

View File

@ -0,0 +1,248 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
// ----------------- ----------------
// | Parameter | | Constant |
// ----------------- ----------------
// | string | | string |
// ----------- ----------------------- -----------
// | Extension | | Extension | | Extension |
// ----------- ----------------------- -----------
// | u8 | string | string | u8
// -------- ----------- -------- --------
// | Result | | Extension | | Result | | Result |
// -------- ----------- -------- --------
// | u8
// ---------------
// | Bitwise (CPU) |
// ---------------
// | u8
// --------
// | Result |
// --------
#include "common_test_utils/ov_tensor_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "test_utils/cpu_test_utils.hpp"
using namespace CPUTestUtils;
using namespace ov::test;
namespace SubgraphTestsDefinitions {
using CustomOpStringCPUTestParams = std::tuple<ElementType, InputShape>;
class CustomOpStringString : public ov::op::Op {
public:
OPENVINO_OP("CustomOpStringString");
CustomOpStringString() = default;
CustomOpStringString(const ov::OutputVector& args) : Op(args) {
constructor_validate_and_infer_types();
}
void validate_and_infer_types() override {
const auto& inputs_count = input_values().size();
OPENVINO_ASSERT(inputs_count == 2, "Input count must be 2, Got: ", inputs_count);
OPENVINO_ASSERT(get_input_element_type(0) == ov::element::Type_t::string, "The input must be string.");
OPENVINO_ASSERT(get_input_element_type(1) == ov::element::Type_t::string, "The input must be string.");
set_output_size(2);
set_output_type(0, ov::element::Type_t::string, get_input_partial_shape(0));
set_output_type(1, ov::element::Type_t::string, get_input_partial_shape(1));
}
std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override {
OPENVINO_ASSERT(new_args.size() == 2, "Incorrect number of new arguments: ", new_args.size(), ". 2 is expected.");
return std::make_shared<CustomOpStringString>(new_args);
}
bool visit_attributes(ov::AttributeVisitor& visitor) override { return true; }
bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override {
for (size_t i = 0lu; i < inputs.size(); i++) {
OPENVINO_ASSERT(inputs[i].get_shape().size() == static_cast<size_t>(get_input_partial_shape(i).rank().get_length()),
"Invalid input shape rank: ", inputs[i].get_shape().size());
}
for (size_t i = 0lu; i < outputs.size(); i++) {
OPENVINO_ASSERT(outputs[i].get_shape().size() == static_cast<size_t>(get_output_partial_shape(i).rank().get_length()),
"Invalid outputs shape rank: ", outputs[i].get_shape().size());
}
auto in_data_0 = inputs[0].data<ov::element_type_traits<ov::element::string>::value_type>();
auto in_data_1 = inputs[1].data<ov::element_type_traits<ov::element::string>::value_type>();
auto out_data_0 = outputs[0].data<ov::element_type_traits<ov::element::string>::value_type>();
auto out_data_1 = outputs[1].data<ov::element_type_traits<ov::element::string>::value_type>();
const auto el_num_0 = outputs[0].get_size();
for (size_t i = 0lu; i < el_num_0; i++) {
out_data_0[i] = in_data_0[i];
}
const auto el_num_1 = outputs[1].get_size();
for (size_t i = 0lu; i < el_num_1; i++) {
out_data_1[i] = in_data_1[i];
}
return true;
}
bool evaluate(ov::TensorVector& output_values,
const ov::TensorVector& input_values,
const ov::EvaluationContext& evaluationContext) const override {
return evaluate(output_values, input_values);
}
bool has_evaluate() const override { return true; }
};
class CustomOpStringU8 : public ov::op::Op {
public:
OPENVINO_OP("CustomOpStringU8");
CustomOpStringU8() = default;
CustomOpStringU8(const ov::OutputVector& args) : Op(args) {
constructor_validate_and_infer_types();
}
void validate_and_infer_types() override {
const auto& inputs_count = input_values().size();
OPENVINO_ASSERT(inputs_count == 1, "Input count must be 1, Got: ", inputs_count);
OPENVINO_ASSERT(get_input_element_type(0) == ov::element::Type_t::string, "The input must be string.");
set_output_size(1);
set_output_type(0, ov::element::Type_t::u8, get_input_partial_shape(0));
}
std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override {
OPENVINO_ASSERT(new_args.size() == 1, "Incorrect number of new arguments: ", new_args.size(), ". 1 is expected.");
return std::make_shared<CustomOpStringU8>(new_args);
}
bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override {
for (size_t i = 0lu; i < inputs.size(); i++) {
OPENVINO_ASSERT(inputs[i].get_shape().size() == static_cast<size_t>(get_input_partial_shape(i).rank().get_length()),
"Invalid input shape rank: ", inputs[i].get_shape().size());
}
for (size_t i = 0lu; i < outputs.size(); i++) {
OPENVINO_ASSERT(outputs[i].get_shape().size() == static_cast<size_t>(get_output_partial_shape(i).rank().get_length()),
"Invalid outputs shape rank: ", outputs[i].get_shape().size());
}
auto in_data_0 = inputs[0].data<ov::element_type_traits<ov::element::string>::value_type>();
auto out_data_0 = outputs[0].data<ov::element_type_traits<ov::element::u8>::value_type>();
const auto el_num_0 = outputs[0].get_size();
for (size_t i = 0lu; i < el_num_0; i++) {
if (in_data_0[i].empty()) {
out_data_0[i] = '_';
} else {
out_data_0[i] = in_data_0[i][0];
}
}
return true;
}
bool has_evaluate() const override { return true; }
bool visit_attributes(ov::AttributeVisitor& visitor) override { return true; }
};
class CustomOpStringCPUTest : public testing::WithParamInterface<CustomOpStringCPUTestParams>,
virtual public SubgraphBaseTest,
public CPUTestsBase {
public:
static std::string getTestCaseName(const testing::TestParamInfo<CustomOpStringCPUTestParams>& obj) {
ElementType in_type;
InputShape inputShape;
std::tie(in_type, inputShape) = obj.param;
std::ostringstream result;
result << "IS=" << inputShape << "_";
result << "Prc=" << in_type;
return result.str();
}
protected:
void SetUp() override {
targetDevice = utils::DEVICE_CPU;
ElementType in_type;
InputShape inputShape;
std::tie(in_type, inputShape) = this->GetParam();
init_input_shapes({inputShape});
auto in_0 = std::make_shared<ov::op::v0::Parameter>(in_type, inputDynamicShapes[0]);
auto in_1 = std::make_shared<ov::op::v0::Constant>(utils::create_and_fill_tensor(in_type, { 1, 3, 5 }));
ov::OutputVector param_outs_0({ in_0, in_1 });
auto str_str_op = std::make_shared<CustomOpStringString>(param_outs_0);
ov::OutputVector param_outs_1({ str_str_op->output(0) });
auto str_u8_op_0 = std::make_shared<CustomOpStringU8>(param_outs_1);
ov::OutputVector param_outs_2({ in_0 });
auto str_u8_op_1 = std::make_shared<CustomOpStringU8>(param_outs_2);
ov::OutputVector param_outs_3({ in_1 });
auto str_u8_op_2 = std::make_shared<CustomOpStringU8>(param_outs_3);
auto btw_not_op = std::make_shared<ov::op::v13::BitwiseNot>(str_u8_op_0->output(0));
ov::ParameterVector input_params{in_0};
ov::ResultVector results{std::make_shared<ov::op::v0::Result>(btw_not_op->output(0)),
std::make_shared<ov::op::v0::Result>(str_str_op->output(1)),
std::make_shared<ov::op::v0::Result>(str_u8_op_1->output(0)),
std::make_shared<ov::op::v0::Result>(str_u8_op_2->output(0))};
function = std::make_shared<ov::Model>(results, input_params, "CustomOpStringString");
}
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
inputs.clear();
const auto& funcInputs = function->inputs();
for (size_t i = 0lu; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
auto tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
}
}
void compare(const std::vector<ov::Tensor>& expected, const std::vector<ov::Tensor>& actual) override {
ASSERT_EQ(expected.size(), actual.size());
ASSERT_EQ(expected.size(), function->get_results().size());
auto expected_data_0 = expected[0].data<ov::element_type_traits<ov::element::u8>::value_type>();
auto actual_data_0 = actual[0].data<ov::element_type_traits<ov::element::u8>::value_type>();
const auto size_0 = expected[0].get_size();
for (size_t i = 0lu; i < size_0; i++) {
OPENVINO_ASSERT(expected_data_0[i] == actual_data_0[i], "At index ", i,
" expected: '", expected_data_0[i], "' actual: '", actual_data_0[i], "'");
}
auto expected_data_1 = expected[1].data<ov::element_type_traits<ov::element::string>::value_type>();
auto actual_data_1 = actual[1].data<ov::element_type_traits<ov::element::string>::value_type>();
const auto size_1 = expected[1].get_size();
for (size_t i = 0lu; i < size_1; i++) {
OPENVINO_ASSERT(expected_data_1[i] == actual_data_1[i], "At index ", i,
" expected: '", expected_data_1[i], "' actual: '", actual_data_1[i], "'");
}
}
};
TEST_P(CustomOpStringCPUTest, CompareWithRefs) {
run();
}
const std::vector<InputShape> inputShapes = {
{{}, {{2, 5}}},
{{}, {{17, 9}}},
{{-1, -1}, {{1, 3}, {5, 17}, {99, 51}}},
{{}, {{}}}
};
INSTANTIATE_TEST_SUITE_P(smoke_CustomOp,
CustomOpStringCPUTest,
::testing::Combine(::testing::Values(ElementType::string), ::testing::ValuesIn(inputShapes)),
CustomOpStringCPUTest::getTestCaseName);
} // namespace SubgraphTestsDefinitions

View File

@ -465,6 +465,12 @@ void inline fill_data_random<InferenceEngine::Precision::BF16>(InferenceEngine::
}
OPENVINO_SUPPRESS_DEPRECATED_END
void fill_random_string(std::string* dst,
const size_t size,
const size_t len_range = 10lu,
const size_t start_from = 0lu,
const int seed = 1);
template <typename T>
typename std::enable_if<std::is_signed<T>::value, T>::type inline ie_abs(const T& val) {
return std::abs(val);

View File

@ -578,6 +578,27 @@ void fill_tensor_random(ov::Tensor& tensor,
#undef CASE_FLOAT
}
void fill_random_string(std::string* dst,
const size_t size,
const size_t len_range,
const size_t start_from,
const int seed) {
static const int32_t char_range = 128;
testing::internal::Random random_len(seed);
random_len.Generate(len_range);
testing::internal::Random random_char(seed);
random_char.Generate(char_range);
for (size_t i = 0lu; i < size; i++) {
const auto len = start_from + static_cast<size_t>(random_len.Generate(len_range));
auto& str = dst[i];
str.resize(len);
for (size_t j = 0lu; j < len; j++) {
str[j] = static_cast<char>(random_len.Generate(char_range));
}
}
}
} // namespace utils
} // namespace test
} // namespace ov

View File

@ -51,6 +51,13 @@ ov::Tensor create_and_fill_tensor(const ov::element::Type element_type,
inGenData.resolution,
inGenData.seed);
break;
case ov::element::Type_t::string:
fill_random_string(static_cast<std::string*>(tensor.data()),
tensor.get_size(),
inGenData.range,
inGenData.start_from,
inGenData.seed);
break;
default:
OPENVINO_THROW("Unsupported element type: ", element_type);
}