diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 19efccff0f8..f6cc713d4e2 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -2,24 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include - -#include -#include #include "cpu_memory.h" -#include "nodes/common/cpu_memcpy.h" -#include "nodes/common/cpu_convert.h" -#include "onednn/dnnl.h" -#include "cpu_shape.h" -#include "memory_desc/dnnl_blocked_memory_desc.h" #include "nodes/reorder.h" -#include "memory_desc/cpu_memory_desc.h" - -using namespace InferenceEngine; -using namespace dnnl; namespace ov { namespace intel_cpu { @@ -169,7 +153,7 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const { // Equivalent of constructor memory(const primitive_desc &desc, void *hdl) // but with ability to skip pads zeroing. auto desc = MemoryDescUtils::convertToDnnlMemoryDesc(m_memObjPtr->getDescPtr()); - m_prim = memory(desc->getDnnlDesc(), m_memObjPtr->getEngine(), DNNL_MEMORY_NONE); + m_prim = dnnl::memory(desc->getDnnlDesc(), m_memObjPtr->getEngine(), DNNL_MEMORY_NONE); // // ======================== auto data = m_memObjPtr->getDataNoThrow(); @@ -288,6 +272,129 @@ void MemoryMngrRealloc::destroy(void *ptr) { dnnl::impl::free(ptr); } +/////////////// StringMemory /////////////// + +StringMemory::StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const void* data) : m_engine(engine), m_mem_desc(desc) { + if (m_mem_desc->getPrecision() != element::string) { + OPENVINO_THROW("[CPU] StringMemory supports String type only."); + } + + m_manager = std::make_shared(); + + if (!m_mem_desc->isDefined()) { + return; + } + + m_size = m_mem_desc->getCurrentMemSize(); + const auto string_size = m_mem_desc->getShape().getElementsCount(); + + if (data != nullptr) { + auto not_const_data = const_cast(data); + m_manager->setExtBuff(reinterpret_cast(not_const_data), string_size); + } else { + m_manager->resize(string_size); + } +} + +void StringMemory::load(const IMemory& src, bool ftz) const { + if (src.getDesc().getPrecision() != element::string) { + OPENVINO_THROW("[CPU] String memory cannot load a non-string object."); + } + + transferData(src, *this, false); +} + +void* StringMemory::getData() const { + return m_manager->getRawPtr(); +} + +void StringMemory::redefineDesc(MemoryDescPtr desc) { + if (desc->getPrecision() != element::string) { + OPENVINO_THROW("[CPU] StringMemory supports String type only."); + } + if (!desc->hasDefinedMaxSize()) { + OPENVINO_THROW("[CPU] StringMemory cannot reset descriptor. Memory upper bound is unknown."); + } + + m_mem_desc = desc; + const auto string_size = m_mem_desc->getShape().getElementsCount(); + m_manager->resize(string_size); +} + +void StringMemory::nullify() { + auto data_ptr = m_manager->getStringPtr(); + if (data_ptr != nullptr) { + std::fill(data_ptr, data_ptr + m_manager->getStrLen(), OvString()); + } +} + +bool StringMemory::isAllocated() const noexcept { + if (getData()) { + return true; + } + if (!m_mem_desc) { + return false; + } + if (!(m_mem_desc->isDefined())) { + return true; + } + if (m_mem_desc->getCurrentMemSize() == 0) { + return true; + } + return false; +} + +MemoryMngrPtr StringMemory::getMemoryMngr() const { + OPENVINO_THROW("Unexpected call of StringMemory::getMemoryMngr()"); +} + +dnnl::memory StringMemory::getPrimitive() const { + OPENVINO_THROW("Unexpected call of StringMemory::getPrimitive()"); +} + +void StringMemory::StringMemoryMngr::setExtBuff(OvString* ptr, size_t size) { + m_use_external_storage = true; + m_str_upper_bound = size; + m_data = decltype(m_data)(ptr, release); +} + +StringMemory::OvString* StringMemory::StringMemoryMngr::getStringPtr() const noexcept { + return m_data.get(); +} + +bool StringMemory::StringMemoryMngr::resize(size_t size) { + bool sizeChanged = false; + if (size > m_str_upper_bound) { + auto ptr = new OvString[size]; + if (!ptr) { + OPENVINO_THROW("Failed to allocate ", size, " bytes of memory"); + } + m_str_upper_bound = size; + m_use_external_storage = false; + m_data = decltype(m_data)(ptr, destroy); + sizeChanged = true; + } + return sizeChanged; +} + +bool StringMemory::StringMemoryMngr::hasExtBuffer() const noexcept { + return m_use_external_storage; +} + +size_t StringMemory::StringMemoryMngr::getStrLen() const noexcept { + return m_str_upper_bound; +} + +void StringMemory::StringMemoryMngr::destroy(OvString* ptr) { + delete[] ptr; +} + +void* StringMemory::StringMemoryMngr::getRawPtr() const noexcept { + return reinterpret_cast(m_data.get()); +} + +/////////////// DnnlMemoryMngr /////////////// + void* DnnlMemoryMngr::getRawPtr() const noexcept { return m_pMemMngr->getRawPtr(); } @@ -348,7 +455,7 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo // ======================== // Equivalent of constructor memory(const primitive_desc &desc, void *hdl) // but with ability to skip pads zeroing. - m_prim = memory(dnnl_desc->getDnnlDesc(), m_eng, DNNL_MEMORY_NONE); + m_prim = dnnl::memory(dnnl_desc->getDnnlDesc(), m_eng, DNNL_MEMORY_NONE); // // ======================== if (pads_zeroing) diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index 4b3b8c5ca30..b734e3b000c 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -4,19 +4,8 @@ #pragma once -#include "ie_layouts.h" -#include "memory_desc/cpu_memory_desc.h" -#include "dnnl_extension_utils.h" -#include "memory_desc/cpu_memory_desc_utils.h" -#include -#include - #include "memory_desc/dnnl_memory_desc.h" -#include -#include -#include -#include /** * @file contains a concept classes to work with memory/tensor/blob abstractions on plugin level. @@ -364,8 +353,90 @@ private: } }; +class StringMemory : public IMemory { +public: + using OvString = ov::element_type_traits::value_type; + + class StringMemoryMngr { + public: + StringMemoryMngr() : m_data(nullptr, release) {} + OvString* getStringPtr() const noexcept; + void setExtBuff(OvString* ptr, size_t size); + size_t getStrLen() const noexcept; + void* getRawPtr() const noexcept; + bool resize(size_t size /* string elements number */); + bool hasExtBuffer() const noexcept; + + private: + bool m_use_external_storage = false; + size_t m_str_upper_bound = 0lu; + std::unique_ptr m_data; + + static void release(OvString* ptr) {} + static void destroy(OvString* ptr); + }; + + using StringMemoryMngrPtr = std::shared_ptr; + + StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const void* data = nullptr); + + StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const void* data = nullptr) + : StringMemory(engine, desc.clone(), data) {} + + StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const StringMemoryMngrPtr& manager) + : m_engine(engine), m_mem_desc(desc), m_manager(manager) {} + + StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const StringMemoryMngrPtr& manager) + : StringMemory(engine, desc.clone(), manager) {} + + bool isAllocated() const noexcept override; + + const MemoryDesc& getDesc() const override { + return *m_mem_desc; + } + + MemoryDescPtr getDescPtr() const override { + return m_mem_desc; + } + + void* getData() const override; + + size_t getSize() const override { // In bytes + return m_size; + } + + const Shape& getShape() const override { + return m_mem_desc->getShape(); + } + + const VectorDims& getStaticDims() const override { + return m_mem_desc->getShape().getStaticDims(); + } + + void redefineDesc(MemoryDescPtr desc) override; + + void load(const IMemory& src, bool ftz = false) const override; + + MemoryMngrPtr getMemoryMngr() const override; + + StringMemoryMngrPtr getStringMemoryMngrPtr() const { + return m_manager; + } + + dnnl::memory getPrimitive() const override; + + void nullify() override; + +private: + dnnl::engine m_engine; + MemoryDescPtr m_mem_desc; + StringMemoryMngrPtr m_manager; + size_t m_size; +}; + using MemoryPtr = std::shared_ptr; using MemoryCPtr = std::shared_ptr; +using StringMemoryPtr = std::shared_ptr; } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 8b3646a37d4..d8c0d9ffc55 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -56,7 +56,6 @@ using namespace dnnl; using namespace InferenceEngine; -using namespace InferenceEngine::details; namespace ov { namespace intel_cpu { @@ -605,6 +604,43 @@ void Graph::AllocateWithReuse() { erase = true; break; } + + // Special allocation for string tensors + if (edge->getDesc().getPrecision() == element::string && edge->getStatus() == Edge::Status::NeedAllocation) { + StringMemory::StringMemoryMngrPtr mngr; + if (edge->getParent()->isConstant()) { + if (edge->getParent()->getType() == Type::Input) { + auto constNode = static_cast(edge->getParent().get()); + edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); + } else { + edge->externalAllocate(context->getWeightsCache()); + } + auto stringMemory = dynamic_cast(edge->getMemoryPtr().get()); + OPENVINO_ASSERT(stringMemory, "[CPU] Edge between nodes '", + edge->getParent()->getName(), "' and '", edge->getChild()->getName(), "' must have StringMemory."); + mngr = stringMemory->getStringMemoryMngrPtr(); + } else { + auto memory = std::make_shared(getEngine(), edge->getDesc()); + edge->reuse(memory); + mngr = memory->getStringMemoryMngrPtr(); + } + for (auto& edge_c : cluster) { + if (edge_c == edge) { + continue; + } + OPENVINO_ASSERT(edge_c->getDesc().getPrecision() == element::string, "All edges in the cluster must be string."); + if (edge_c->getStatus() == Edge::Status::NotAllocated) { + auto memory = std::make_shared(getEngine(), edge_c->getDesc(), mngr); + edge_c->reuse(memory); + } else { + OPENVINO_THROW("[CPU] String tensors allocation in the cluster. Edge between nodes '", edge_c->getParent()->getName(), "' and '", + edge_c->getChild()->getName(), "' has an unexpected status: ", static_cast(edge_c->getStatus())); + } + } + erase = true; + continue; + } + // Special allocation for constants if (edge->getStatus() != Edge::Status::NeedAllocation || !edge->getParent()->isConstant()) { continue; @@ -904,7 +940,10 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr& inp auto ext_tensor_desc = MemoryDescUtils::generateCpuBlockedMemoryDesc(input); auto actualDesc = edgeMemory->getDescPtr(); - if (!actualDesc->isCompatible(*ext_tensor_desc)) { + if (actualDesc->getPrecision() == element::string) { + StringMemory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr); + edgeMemory->load(ext_mem); + } else if (!actualDesc->isCompatible(*ext_tensor_desc)) { Memory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr, false); edgeMemory->load(ext_mem, false); } else { @@ -983,7 +1022,10 @@ void Graph::PullOutputData(std::unordered_map>& // That is the same memory. No need to copy if (ext_blob_ptr == intr_blob_ptr) continue; - if (!actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) { + if (actualDesc->getPrecision() == element::string) { + StringMemory outBloMem(getEngine(), expected_desc_ptr, ext_blob_ptr); + outBloMem.load(intr_blob); + } else if (!actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) { Memory outBloMem(getEngine(), expected_desc_ptr, ext_blob_ptr, false); outBloMem.load(intr_blob, false); } else { diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index b3620b37623..5797d2991c8 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -26,6 +26,8 @@ #include "utils/cpu_utils.hpp" #include "utils/general_utils.h" +using OvString = ov::element_type_traits::value_type; + namespace ov { namespace intel_cpu { SyncInferRequest::SyncInferRequest(std::shared_ptr compiled_model) @@ -173,11 +175,17 @@ std::vector SyncInferRequest::get_profiling_info() const { } static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr& tensor) { - auto size = tensor->get_byte_size(); auto& mem = edge->getMemory(); - auto memMngr = mem.getMemoryMngr(); - OPENVINO_ASSERT(memMngr); - memMngr->setExtBuff(tensor->data(), size); + + if (tensor->get_element_type() == element::string) { + auto memMngr = dynamic_cast(mem).getStringMemoryMngrPtr(); + OPENVINO_ASSERT(memMngr); + memMngr->setExtBuff(tensor->data(), tensor->get_size()); + } else { + auto memMngr = mem.getMemoryMngr(); + OPENVINO_ASSERT(memMngr); + memMngr->setExtBuff(tensor->data(), tensor->get_byte_size()); + } } void SyncInferRequest::change_default_ptr() { diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp index 58fcf70d494..6d65b51b93e 100644 --- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp @@ -3,25 +3,19 @@ // #include "cpu_convert.h" + #include "cpu_memcpy.h" -#include "openvino/core/parallel.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "utils/bfloat16.hpp" + #if defined(OPENVINO_ARCH_X86_64) #include "nodes/kernels/x64/jit_kernel.hpp" -#include +#else +#include "cpu_memory.h" +#include "openvino/core/type/element_type_traits.hpp" +#include "selective_build.h" +#include "utils/general_utils.h" #endif -using namespace InferenceEngine; - - namespace ov { namespace intel_cpu { namespace { @@ -583,7 +577,11 @@ void cpu_convert(const void *srcPtr, if (srcPrc == dstPrc && srcPrc == interimPrc) { const size_t L2_cache_size = dnnl::utils::get_cache_size(2, true); const size_t totalSize = size * dstPrc.size(); - if (totalSize >= L2_cache_size) { + if (srcPrc == element::string) { + auto str_src = reinterpret_cast(srcPtr); + auto str_dst = reinterpret_cast(dstPtr); + std::copy(str_src, str_src + size, str_dst); + } else if (totalSize >= L2_cache_size) { auto src = static_cast(srcPtr); auto dst = static_cast(dstPtr); parallel_nt(0, [&](const size_t ithr, const size_t nthr) { diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 86f4d47a97f..4fbf8ebb3a4 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -3,28 +3,12 @@ // #include "input.h" -#include "common/cpu_memcpy.h" -#include -#include -#include -#include -#include -#include +#include "cpu/x64/jit_generator.hpp" #include "openvino/core/parallel.hpp" -#include -#include "caseless.hpp" -#include "common/cpu_memcpy.h" -#include "common/cpu_convert.h" -#include "utils/cpu_utils.hpp" -#include -#include "memory_desc/dnnl_blocked_memory_desc.h" #include "shape_inference/shape_inference_pass_through.hpp" using namespace dnnl; -using namespace InferenceEngine; -using namespace details; -using namespace ov::op; using namespace dnnl::impl::cpu::x64; using namespace Xbyak; @@ -234,11 +218,11 @@ jit_has_subnormals_base::fn_t jit_has_subnormals_function() { Input::Input(const std::shared_ptr& op, const GraphContext::CPtr context) : Node(op, context, PassThroughShapeInferFactory()) { if (!one_of(op->get_type_info(), - v0::Parameter::get_type_info_static(), - v0::Constant::get_type_info_static(), - v0::Result::get_type_info_static(), - v3::ReadValue::get_type_info_static(), - v6::ReadValue::get_type_info_static())) + op::v0::Parameter::get_type_info_static(), + op::v0::Constant::get_type_info_static(), + op::v0::Result::get_type_info_static(), + op::v3::ReadValue::get_type_info_static(), + op::v6::ReadValue::get_type_info_static())) OPENVINO_THROW_NOT_IMPLEMENTED("CPU Input node doesn't support ngraph operation ", op->get_type_name(), " with name ", @@ -246,7 +230,7 @@ Input::Input(const std::shared_ptr& op, const GraphContext::CPtr conte constant = ConstantType::NoConst; - constOp = ov::as_type_ptr(op); + constOp = ov::as_type_ptr(op); if (constOp) { constant = ConstantType::Const; cloneBlobIfRequired(); @@ -275,13 +259,29 @@ void Input::cloneBlobIfRequired() { // but ngraph Constant uses actual bitWidth for data storage allocation // in that case we make a copy to avoid overflow if (constOp->get_byte_size() >= memDesc.getCurrentMemSize()) { - memory = std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()); + if (constOp->get_element_type() == element::string) { + memory = std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()); + } else { + memory = std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()); + } } else { - memory = std::make_shared(getEngine(), memDesc); - memcpy(memory->getData(), constOp->get_data_ptr(), constOp->get_byte_size()); + if (constOp->get_element_type() == element::string) { + memory = std::make_shared(getEngine(), memDesc); + auto src = constOp->get_data_ptr(); + auto dst = reinterpret_cast(memory->getData()); + std::copy(src, src + size, dst); + } else { + memory = std::make_shared(getEngine(), memDesc); + memcpy(memory->getData(), constOp->get_data_ptr(), constOp->get_byte_size()); + } } - MemoryPtr ptr = std::make_shared(getEngine(), memDesc); + MemoryPtr ptr; + if (memDesc.getPrecision() == element::string) { + ptr = std::make_shared(getEngine(), memDesc); + } else { + ptr = std::make_shared(getEngine(), memDesc); + } ptr->load(*memory.get(), needFlushDenormalsToZero); return ptr; @@ -381,7 +381,7 @@ void Input::cloneBlobIfRequired() { memoryPtr = std::const_pointer_cast(ptr); // IRs already have all subnormals flushed to zero, but in // read_model scenario with directly loaded original model still can have subnormals - } else if (isBlobAligned() && (!needFlushDenormalsToZero || !hasSubnormals()) && !isWA()) { + } else if (prec != element::string && isBlobAligned() && (!needFlushDenormalsToZero || !hasSubnormals()) && !isWA()) { memoryPtr = std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()); } else { memoryPtr = std::const_pointer_cast(cloneBlob()); @@ -420,14 +420,14 @@ MemoryCPtr Input::getMemoryPtr() const { void Input::getSupportedDescriptors() { if (getType() == Type::Input) { if (!getParentEdges().empty()) - OPENVINO_THROW("Incorrect number of input edges for layer ", getName()); + THROW_CPU_NODE_ERR("has incorrect number of input edges."); if (getChildEdges().empty()) - OPENVINO_THROW("Incorrect number of output edges for layer ", getName()); + THROW_CPU_NODE_ERR("has incorrect number of output edges."); } else if (getType() == Type::Output) { if (getParentEdges().size() != 1) - OPENVINO_THROW("Incorrect number of input edges for layer ", getName()); + THROW_CPU_NODE_ERR("has incorrect number of input edges."); if (!getChildEdges().empty()) - OPENVINO_THROW("Incorrect number of output edges for layer ", getName()); + THROW_CPU_NODE_ERR("has incorrect number of output edges."); } } @@ -446,19 +446,19 @@ void Input::createPrimitive() { for (size_t i = 0; i < getChildEdges().size(); i++) { auto dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate for node ", getName() - , " to node ", getChildEdgeAt(i)->getChild()->getName(), "."); + THROW_CPU_NODE_ERR("has unallocated memory object at port ", i, + " to node ", getChildEdgeAt(i)->getChild()->getName(), "."); } for (size_t i = 0; i < getParentEdges().size(); i++) { auto srcMemPtr = getParentEdgeAt(i)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate for node ", getName() - , " from node ", getParentEdgeAt(i)->getParent()->getName(), "."); + THROW_CPU_NODE_ERR("has unallocated memory object at port ", i, + " from node ", getParentEdgeAt(i)->getParent()->getName(), "."); } const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) - OPENVINO_THROW("Preferable primitive descriptor is not set for node ", getName(), "."); + THROW_CPU_NODE_ERR("doesn't have selected primitive descriptor."); } bool Input::created() const { diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h index f8e5a4f1947..58daadccaff 100644 --- a/src/plugins/intel_cpu/src/nodes/input.h +++ b/src/plugins/intel_cpu/src/nodes/input.h @@ -4,10 +4,8 @@ #pragma once -#include #include #include -#include namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index b7774985dac..327c54f9d55 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -3,13 +3,7 @@ // #include "reference.h" - #include "common/cpu_memcpy.h" -#include -#include "openvino/core/shape_util.hpp" - -using namespace InferenceEngine; -using namespace InferenceEngine::details; namespace ov { namespace intel_cpu { @@ -94,7 +88,13 @@ void Reference::executeDynamicImpl(dnnl::stream strm) { if (memory->getSize() != tensor.get_byte_size()) { THROW_CPU_NODE_ERR("output tensor data size mismatch occurred during the inference on output port number ", i); } - cpu_memcpy(memory->getData(), tensor.data(), tensor.get_byte_size()); + if (tensor.get_element_type() == element::string) { + auto srcPtr = tensor.data(); + auto dstPtr = reinterpret_cast(memory->getData()); + std::copy(srcPtr, srcPtr + tensor.get_size(), dstPtr); + } else { + cpu_memcpy(memory->getData(), tensor.data(), tensor.get_byte_size()); + } } } } diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index 8cf3ecee2fa..41b6509f70c 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -3,24 +3,14 @@ // #include "reorder.h" -#include -#include -#include -#include -#include -#include "openvino/core/parallel.hpp" -#include "utils/general_utils.h" -#include -#include "nodes/common/cpu_memcpy.h" -#include "nodes/common/cpu_convert.h" -#include "nodes/common/reorder_prim.h" -#include "convert.h" -#include -#include -#include "executors/transpose_list.hpp" -using namespace dnnl; -using namespace InferenceEngine; +#include "convert.h" +#include "cpu/x64/cpu_isa_traits.hpp" +#include "nodes/common/cpu_convert.h" +#include "nodes/common/cpu_memcpy.h" +#include "nodes/common/reorder_prim.h" +#include "openvino/core/parallel.hpp" +#include "shape_inference/shape_inference_pass_through.hpp" namespace ov { namespace intel_cpu { @@ -32,7 +22,7 @@ bool Reorder::isExecutable() const { Reorder::Reorder(const std::shared_ptr& op, const GraphContext::CPtr context) : Node(op, context, PassThroughShapeInferFactory()) { - OPENVINO_THROW("Can't create reorder node from ngraph node"); + THROW_CPU_NODE_ERR("could not create CPU node from Core node."); } Reorder::Reorder(const std::string& name, const GraphContext::CPtr context) : @@ -40,9 +30,9 @@ Reorder::Reorder(const std::string& name, const GraphContext::CPtr context) : void Reorder::getSupportedDescriptors() { if (getParentEdges().size() != 1) - OPENVINO_THROW("Incorrect number of input edges for layer ", getName()); + THROW_CPU_NODE_ERR("has incorrect number of input edges."); if (getChildEdges().empty()) - OPENVINO_THROW("Incorrect number of output edges for layer ", getName()); + THROW_CPU_NODE_ERR("has incorrect number of output edges."); } void Reorder::initSupportedPrimitiveDescriptors() { @@ -71,7 +61,7 @@ void Reorder::initSupportedPrimitiveDescriptors() { config.inConfs[0].setMemDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].getMemDesc()); config.outConfs[0].setMemDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc()); } else { - OPENVINO_THROW("Cannot initialize supported PDs for Reorder node with name `", getName(), "`"); + THROW_CPU_NODE_ERR("could not initialize supported PDs."); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::reorder); @@ -83,9 +73,7 @@ void Reorder::initSupportedPrimitiveDescriptors() { } if (isDynamic && (config.inConfs[0].getMemDesc()->getShape().getRank() != config.outConfs[0].getMemDesc()->getShape().getRank())) - OPENVINO_THROW("Reorder node with name: ", - getName(), - " doesn't support case when input and output shapes have different rank and dynamic"); + THROW_CPU_NODE_ERR("doesn't support case when input and output shapes have different rank and dynamic."); if (!isOptimized) { const auto &inShape = getInputShapeAtPort(0); if (one_of(inShape.getRank(), 4u, 5u) && @@ -95,7 +83,7 @@ void Reorder::initSupportedPrimitiveDescriptors() { config.outConfs[0].getMemDesc()->getPrecision() == ov::element::f32) { // oneDNN JIT reorder shows bad perf for nspc to ncsp reorder case so we fallback on simple c++ implementation isNspc2NcspCase = true; - } else if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) && + } else if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2) && one_of(inShape.getRank(), 4u, 5u) && config.inConfs[0].getMemDesc()->hasLayoutType(LayoutType::ncsp) && config.outConfs[0].getMemDesc()->hasLayoutType(LayoutType::nspc) && @@ -180,11 +168,11 @@ void Reorder::prepareParams() { auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); + THROW_CPU_NODE_ERR("has unallocated destination memory object."); if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory didn't allocate."); + THROW_CPU_NODE_ERR("has unallocated input memory object."); if (getSelectedPrimitiveDescriptor() == nullptr) - OPENVINO_THROW("Preferable primitive descriptor is not set."); + THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); auto isSupportedDesc = [](const MemoryDesc& desc) { if (!desc.isDefined()) { @@ -239,11 +227,11 @@ void Reorder::prepareParams() { } if (!canUseNcsp2Nspc && !canUseNspc2Ncsp) { if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); + THROW_CPU_NODE_ERR("has unallocated destination memory object."); if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory didn't allocate."); + THROW_CPU_NODE_ERR("has unallocated input memory object."); if (getSelectedPrimitiveDescriptor() == nullptr) - OPENVINO_THROW("Preferable primitive descriptor is not set."); + THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); createReorderPrimitive(srcMemPtr->getDescWithType()->getDnnlDesc(), srcMemPtr->getData(), dstMemPtr->getDescWithType()->getDnnlDesc(), dstMemPtr->getData()); @@ -256,7 +244,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, void* dstPtr) { auto selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) - OPENVINO_THROW("Preferable primitive descriptor is not set."); + THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); const auto engine = getEngine(); src_blocked = std::make_shared(engine, DnnlExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); @@ -301,7 +289,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, auto result = getReorderPrim(context->getParamsCache(), getEngine(), src_desc, dst_desc); if (!result) { - OPENVINO_THROW("Cannot create reorder primitive: unsupported reorder case"); + THROW_CPU_NODE_ERR("could not create reorder primitive: unsupported reorder case."); } prim = result; @@ -419,7 +407,7 @@ void Reorder::execute(dnnl::stream strm) { if (prim) { prim.execute(strm, primArgs); } else { - OPENVINO_THROW("Reorder node with name ", getName(), " doesn't have an initialized primitive"); + THROW_CPU_NODE_ERR("doesn't have an initialized primitive."); } } } @@ -448,11 +436,17 @@ void Reorder::reorderData(const IMemory &input, const IMemory &output, MultiCach } if (input.getDesc().isCompatible(output.getDesc())) { - auto srcPtr = static_cast(input.getData()); - auto dstPtr = static_cast(output.getData()); + if (input.getDesc().getPrecision() == element::string) { + auto srcPtr = reinterpret_cast(input.getData()); + auto dstPtr = reinterpret_cast(output.getData()); + std::copy(srcPtr, srcPtr + output.getShape().getElementsCount(), dstPtr); + } else { + auto srcPtr = static_cast(input.getData()); + auto dstPtr = static_cast(output.getData()); - auto copySize = output.getSize(); - cpu_memcpy(dstPtr, srcPtr, copySize); + auto copySize = output.getSize(); + cpu_memcpy(dstPtr, srcPtr, copySize); + } } else { dnnl::reorder reorder; std::vector tmpBuff; diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index 1222549ac31..ffe43f0b56c 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -4,13 +4,11 @@ #pragma once -#include #include -#include -#include -#include + +#if defined(OV_CPU_ARM_ENABLE_FP16) #include "nodes/executors/transpose.hpp" -#include +#endif namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 394db23ef58..779b6ef9fa2 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -533,7 +533,8 @@ Engine::compile_model(const std::shared_ptr& model, const ov::A ov::element::Type_t::f16, ov::element::Type_t::f32, ov::element::Type_t::f64, - ov::element::Type_t::boolean}; + ov::element::Type_t::boolean, + ov::element::Type_t::string}; if (!supported_precisions.count(input_precision)) { OPENVINO_THROW_NOT_IMPLEMENTED("CPU plugin: Input image format ", diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_string.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_string.cpp new file mode 100644 index 00000000000..34e92e3e59b --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_string.cpp @@ -0,0 +1,248 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// ----------------- ---------------- +// | Parameter | | Constant | +// ----------------- ---------------- +// | string | | string | +// ----------- ----------------------- ----------- +// | Extension | | Extension | | Extension | +// ----------- ----------------------- ----------- +// | u8 | string | string | u8 +// -------- ----------- -------- -------- +// | Result | | Extension | | Result | | Result | +// -------- ----------- -------- -------- +// | u8 +// --------------- +// | Bitwise (CPU) | +// --------------- +// | u8 +// -------- +// | Result | +// -------- + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "test_utils/cpu_test_utils.hpp" + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace SubgraphTestsDefinitions { + +using CustomOpStringCPUTestParams = std::tuple; + +class CustomOpStringString : public ov::op::Op { +public: + OPENVINO_OP("CustomOpStringString"); + + CustomOpStringString() = default; + CustomOpStringString(const ov::OutputVector& args) : Op(args) { + constructor_validate_and_infer_types(); + } + + void validate_and_infer_types() override { + const auto& inputs_count = input_values().size(); + OPENVINO_ASSERT(inputs_count == 2, "Input count must be 2, Got: ", inputs_count); + OPENVINO_ASSERT(get_input_element_type(0) == ov::element::Type_t::string, "The input must be string."); + OPENVINO_ASSERT(get_input_element_type(1) == ov::element::Type_t::string, "The input must be string."); + + set_output_size(2); + set_output_type(0, ov::element::Type_t::string, get_input_partial_shape(0)); + set_output_type(1, ov::element::Type_t::string, get_input_partial_shape(1)); + } + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override { + OPENVINO_ASSERT(new_args.size() == 2, "Incorrect number of new arguments: ", new_args.size(), ". 2 is expected."); + return std::make_shared(new_args); + } + + bool visit_attributes(ov::AttributeVisitor& visitor) override { return true; } + + bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override { + for (size_t i = 0lu; i < inputs.size(); i++) { + OPENVINO_ASSERT(inputs[i].get_shape().size() == static_cast(get_input_partial_shape(i).rank().get_length()), + "Invalid input shape rank: ", inputs[i].get_shape().size()); + } + for (size_t i = 0lu; i < outputs.size(); i++) { + OPENVINO_ASSERT(outputs[i].get_shape().size() == static_cast(get_output_partial_shape(i).rank().get_length()), + "Invalid outputs shape rank: ", outputs[i].get_shape().size()); + } + + auto in_data_0 = inputs[0].data::value_type>(); + auto in_data_1 = inputs[1].data::value_type>(); + auto out_data_0 = outputs[0].data::value_type>(); + auto out_data_1 = outputs[1].data::value_type>(); + + const auto el_num_0 = outputs[0].get_size(); + for (size_t i = 0lu; i < el_num_0; i++) { + out_data_0[i] = in_data_0[i]; + } + + const auto el_num_1 = outputs[1].get_size(); + for (size_t i = 0lu; i < el_num_1; i++) { + out_data_1[i] = in_data_1[i]; + } + + return true; + } + + bool evaluate(ov::TensorVector& output_values, + const ov::TensorVector& input_values, + const ov::EvaluationContext& evaluationContext) const override { + return evaluate(output_values, input_values); + } + + bool has_evaluate() const override { return true; } +}; + +class CustomOpStringU8 : public ov::op::Op { +public: + OPENVINO_OP("CustomOpStringU8"); + + CustomOpStringU8() = default; + CustomOpStringU8(const ov::OutputVector& args) : Op(args) { + constructor_validate_and_infer_types(); + } + + void validate_and_infer_types() override { + const auto& inputs_count = input_values().size(); + OPENVINO_ASSERT(inputs_count == 1, "Input count must be 1, Got: ", inputs_count); + OPENVINO_ASSERT(get_input_element_type(0) == ov::element::Type_t::string, "The input must be string."); + + set_output_size(1); + set_output_type(0, ov::element::Type_t::u8, get_input_partial_shape(0)); + } + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override { + OPENVINO_ASSERT(new_args.size() == 1, "Incorrect number of new arguments: ", new_args.size(), ". 1 is expected."); + return std::make_shared(new_args); + } + + bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override { + for (size_t i = 0lu; i < inputs.size(); i++) { + OPENVINO_ASSERT(inputs[i].get_shape().size() == static_cast(get_input_partial_shape(i).rank().get_length()), + "Invalid input shape rank: ", inputs[i].get_shape().size()); + } + for (size_t i = 0lu; i < outputs.size(); i++) { + OPENVINO_ASSERT(outputs[i].get_shape().size() == static_cast(get_output_partial_shape(i).rank().get_length()), + "Invalid outputs shape rank: ", outputs[i].get_shape().size()); + } + + auto in_data_0 = inputs[0].data::value_type>(); + auto out_data_0 = outputs[0].data::value_type>(); + + const auto el_num_0 = outputs[0].get_size(); + for (size_t i = 0lu; i < el_num_0; i++) { + if (in_data_0[i].empty()) { + out_data_0[i] = '_'; + } else { + out_data_0[i] = in_data_0[i][0]; + } + } + + return true; + } + + bool has_evaluate() const override { return true; } + bool visit_attributes(ov::AttributeVisitor& visitor) override { return true; } +}; + +class CustomOpStringCPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + ElementType in_type; + InputShape inputShape; + std::tie(in_type, inputShape) = obj.param; + + std::ostringstream result; + result << "IS=" << inputShape << "_"; + result << "Prc=" << in_type; + return result.str(); + } + +protected: + void SetUp() override { + targetDevice = utils::DEVICE_CPU; + + ElementType in_type; + InputShape inputShape; + std::tie(in_type, inputShape) = this->GetParam(); + + init_input_shapes({inputShape}); + + auto in_0 = std::make_shared(in_type, inputDynamicShapes[0]); + auto in_1 = std::make_shared(utils::create_and_fill_tensor(in_type, { 1, 3, 5 })); + ov::OutputVector param_outs_0({ in_0, in_1 }); + auto str_str_op = std::make_shared(param_outs_0); + ov::OutputVector param_outs_1({ str_str_op->output(0) }); + auto str_u8_op_0 = std::make_shared(param_outs_1); + ov::OutputVector param_outs_2({ in_0 }); + auto str_u8_op_1 = std::make_shared(param_outs_2); + ov::OutputVector param_outs_3({ in_1 }); + auto str_u8_op_2 = std::make_shared(param_outs_3); + auto btw_not_op = std::make_shared(str_u8_op_0->output(0)); + + ov::ParameterVector input_params{in_0}; + ov::ResultVector results{std::make_shared(btw_not_op->output(0)), + std::make_shared(str_str_op->output(1)), + std::make_shared(str_u8_op_1->output(0)), + std::make_shared(str_u8_op_2->output(0))}; + function = std::make_shared(results, input_params, "CustomOpStringString"); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0lu; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + auto tensor = utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + } + + void compare(const std::vector& expected, const std::vector& actual) override { + ASSERT_EQ(expected.size(), actual.size()); + ASSERT_EQ(expected.size(), function->get_results().size()); + + auto expected_data_0 = expected[0].data::value_type>(); + auto actual_data_0 = actual[0].data::value_type>(); + const auto size_0 = expected[0].get_size(); + + for (size_t i = 0lu; i < size_0; i++) { + OPENVINO_ASSERT(expected_data_0[i] == actual_data_0[i], "At index ", i, + " expected: '", expected_data_0[i], "' actual: '", actual_data_0[i], "'"); + } + + auto expected_data_1 = expected[1].data::value_type>(); + auto actual_data_1 = actual[1].data::value_type>(); + const auto size_1 = expected[1].get_size(); + + for (size_t i = 0lu; i < size_1; i++) { + OPENVINO_ASSERT(expected_data_1[i] == actual_data_1[i], "At index ", i, + " expected: '", expected_data_1[i], "' actual: '", actual_data_1[i], "'"); + } + } +}; + +TEST_P(CustomOpStringCPUTest, CompareWithRefs) { + run(); +} + +const std::vector inputShapes = { + {{}, {{2, 5}}}, + {{}, {{17, 9}}}, + {{-1, -1}, {{1, 3}, {5, 17}, {99, 51}}}, + {{}, {{}}} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CustomOp, + CustomOpStringCPUTest, + ::testing::Combine(::testing::Values(ElementType::string), ::testing::ValuesIn(inputShapes)), + CustomOpStringCPUTest::getTestCaseName); + +} // namespace SubgraphTestsDefinitions diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp index 3b876a530cd..cf0e0ebe299 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp @@ -465,6 +465,12 @@ void inline fill_data_random(InferenceEngine:: } OPENVINO_SUPPRESS_DEPRECATED_END +void fill_random_string(std::string* dst, + const size_t size, + const size_t len_range = 10lu, + const size_t start_from = 0lu, + const int seed = 1); + template typename std::enable_if::value, T>::type inline ie_abs(const T& val) { return std::abs(val); diff --git a/src/tests/test_utils/common_test_utils/src/data_utils.cpp b/src/tests/test_utils/common_test_utils/src/data_utils.cpp index ee1eb532c87..7f9d15306fa 100644 --- a/src/tests/test_utils/common_test_utils/src/data_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/data_utils.cpp @@ -578,6 +578,27 @@ void fill_tensor_random(ov::Tensor& tensor, #undef CASE_FLOAT } +void fill_random_string(std::string* dst, + const size_t size, + const size_t len_range, + const size_t start_from, + const int seed) { + static const int32_t char_range = 128; + testing::internal::Random random_len(seed); + random_len.Generate(len_range); + testing::internal::Random random_char(seed); + random_char.Generate(char_range); + + for (size_t i = 0lu; i < size; i++) { + const auto len = start_from + static_cast(random_len.Generate(len_range)); + auto& str = dst[i]; + str.resize(len); + for (size_t j = 0lu; j < len; j++) { + str[j] = static_cast(random_len.Generate(char_range)); + } + } +} + } // namespace utils } // namespace test } // namespace ov diff --git a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp index b39a53bb296..8f8a503af9d 100644 --- a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp @@ -51,6 +51,13 @@ ov::Tensor create_and_fill_tensor(const ov::element::Type element_type, inGenData.resolution, inGenData.seed); break; + case ov::element::Type_t::string: + fill_random_string(static_cast(tensor.data()), + tensor.get_size(), + inGenData.range, + inGenData.start_from, + inGenData.seed); + break; default: OPENVINO_THROW("Unsupported element type: ", element_type); }