[CPU] Zero-copy optimizations for model outputs (#18476)

- Implement zero-copy output between plugin graph and infer request, thus eliminate memory copy overhead and optimize performance
- Implement double buffer for InferRequest outputs
This commit is contained in:
cecilia peng
2023-07-25 14:33:48 +08:00
committed by GitHub
parent fdfafbb7d2
commit 7fbd3a7ebf
25 changed files with 1239 additions and 95 deletions

View File

@@ -105,7 +105,7 @@ void CommonOptimizations::SplitDimensionM(const std::shared_ptr<ov::snippets::op
const auto needed_new_dim = m_dim / batch_dim_multiplier; // m / (LCM(b, nthrs) / b) - needed factors of dimension m
auto is_optimized = [&](size_t batch_m_dim, size_t new_m_dim) {
return batch_m_dim != 1 && new_m_dim >= optimal_m_dim;
return batch_m_dim != 1 && new_m_dim >= static_cast<size_t>(optimal_m_dim);
};
if (batch_dim_multiplier * needed_new_dim == m_dim) {

View File

@@ -71,9 +71,9 @@ ov::SoPtr<ITensor> make_tensor(const std::shared_ptr<InferenceEngine::Blob>& ten
const InferenceEngine::Blob* get_hardware_blob(const InferenceEngine::Blob* blob);
InferenceEngine::Blob* get_hardware_blob(InferenceEngine::Blob* blob);
std::shared_ptr<InferenceEngine::Blob> tensor_to_blob(const ov::SoPtr<ITensor>& tensor,
bool unwrap = true,
InferenceEngine::TensorDesc desc = {});
OPENVINO_RUNTIME_API std::shared_ptr<InferenceEngine::Blob> tensor_to_blob(const ov::SoPtr<ITensor>& tensor,
bool unwrap = true,
InferenceEngine::TensorDesc desc = {});
/** @endcond */
IE_SUPPRESS_DEPRECATED_END

View File

@@ -260,8 +260,9 @@ void ov::ISyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
" expecting ",
port.get_shape(),
".");
OPENVINO_ASSERT(std::dynamic_pointer_cast<ov::IRemoteTensor>(tensor._ptr) || tensor->data() != nullptr,
"Tensor data equal nullptr!");
OPENVINO_ASSERT(
std::dynamic_pointer_cast<ov::IRemoteTensor>(tensor._ptr) || tensor->data() != nullptr || is_dynamic,
"Tensor data equal nullptr!");
}
void ov::ISyncInferRequest::allocate_tensor(

View File

@@ -298,7 +298,9 @@ BlockingDesc::BlockingDesc(const SizeVector& blocked_dims,
this->offsetPaddingToData = dimOffsets;
// check that strides are valid
{
if (!std::any_of(blocked_dims.begin(), blocked_dims.end(), [](const size_t dim) {
return dim == 0ul;
})) {
size_t denseStride = 1;
for (size_t i = 1; i <= strides.size(); i++) {

View File

@@ -32,6 +32,7 @@ namespace ov {
namespace intel_cpu {
class Memory;
class ProxyMemoryMngr;
/**
* @interface IMemoryMngr
@@ -313,6 +314,7 @@ public:
private:
friend DnnlMemoryMngr;
friend ProxyMemoryMngr;
private:
void update();

View File

@@ -0,0 +1,98 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cpu_tensor.h"
#include "ie_ngraph_utils.hpp"
#include "utils/debug_capabilities.h"
namespace ov {
namespace intel_cpu {
Tensor::Tensor(MemoryPtr memptr) : m_memptr{memptr} {
OPENVINO_ASSERT(m_memptr != nullptr);
// only support plain data format ncsp.
auto memdesc = m_memptr->getDescPtr();
OPENVINO_ASSERT(memdesc->hasLayoutType(LayoutType::ncsp), "intel_cpu::Tensor only supports memory with ncsp layout.");
m_element_type = InferenceEngine::details::convertPrecision(memdesc->getPrecision());
}
void Tensor::set_shape(ov::Shape new_shape) {
const auto& shape = m_memptr->getDescPtr()->getShape();
if (shape.isStatic()) {
DEBUG_LOG("tensor's memory object ", m_memptr.get(), ", ", vec2str(shape.getStaticDims()), " -> ", new_shape.to_string());
if (shape.getStaticDims() == new_shape) return;
}
auto desc = m_memptr->getDescPtr();
const auto newDesc = desc->cloneWithNewDims(new_shape, true);
m_memptr->redefineDesc(newDesc);
}
const ov::element::Type& Tensor::get_element_type() const {
return m_element_type;
}
const ov::Shape& Tensor::get_shape() const {
auto& shape = m_memptr->getDescPtr()->getShape();
OPENVINO_ASSERT(shape.isStatic(), "intel_cpu::Tensor has dynamic shape.");
std::lock_guard<std::mutex> guard(m_lock);
m_shape = ov::Shape{shape.getStaticDims()};
return m_shape;
}
size_t Tensor::get_size() const {
auto& desc = m_memptr->getDesc();
return desc.getShape().getElementsCount();
}
size_t Tensor::get_byte_size() const {
auto& desc = m_memptr->getDesc();
return desc.getCurrentMemSize();
}
const ov::Strides& Tensor::get_strides() const {
OPENVINO_ASSERT(m_memptr->getDescPtr()->isDefined(), "intel_cpu::Tensor requires memory with defined strides.");
std::lock_guard<std::mutex> guard(m_lock);
update_strides();
return m_strides;
}
void Tensor::update_strides() const {
auto blocked_desc = m_memptr->getDescWithType<BlockedMemoryDesc>();
OPENVINO_ASSERT(blocked_desc, "not a valid blocked memory descriptor.");
auto& strides = blocked_desc->getStrides();
m_strides.resize(strides.size());
std::transform(strides.cbegin(), strides.cend(), m_strides.begin(),
std::bind1st(std::multiplies<size_t>(), m_element_type.size()));
}
void* Tensor::data(const element::Type& element_type) const {
if (element_type != element::undefined && element_type != element::dynamic) {
OPENVINO_ASSERT(element_type == get_element_type(),
"Tensor data with element type ",
get_element_type(),
", is not representable as pointer to ",
element_type);
}
return m_memptr->getData();
}
/**
* @brief Creates tensor on graph memory
*
* @param mem Memory object
*
* @return Shared pointer to tensor interface
*/
std::shared_ptr<ITensor> make_tensor(MemoryPtr mem) {
return std::make_shared<Tensor>(mem);
}
} // namespace intel_cpu
} // namespace ov

View File

@@ -0,0 +1,48 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "openvino/runtime/itensor.hpp"
#include "cpu_memory.h"
namespace ov {
namespace intel_cpu {
class Tensor : public ITensor {
public:
// Only plain data format is supported.
explicit Tensor(MemoryPtr memptr);
void set_shape(ov::Shape shape) override;
const ov::element::Type& get_element_type() const override;
const ov::Shape& get_shape() const override;
size_t get_size() const override;
size_t get_byte_size() const override;
const ov::Strides& get_strides() const override;
void* data(const element::Type& type = {}) const override;
MemoryPtr get_memory() {return m_memptr;}
private:
void update_strides() const;
MemoryPtr m_memptr;
ov::element::Type m_element_type;
mutable ov::Shape m_shape;
mutable ov::Strides m_strides;
mutable std::mutex m_lock;
};
std::shared_ptr<ITensor> make_tensor(MemoryPtr mem);
} // namespace intel_cpu
} // namespace ov

View File

@@ -515,6 +515,13 @@ EdgePtr Edge::getBaseEdge(int look) {
if (edge->inPlace() && edge != edgesForSamePort[0]) return edge;
}
}
// Return the first output edge as the base if there is no inPlace consumers
// thus benefits zero-copy of outputs.
for (auto edge : edgesForSamePort) {
if (Type::Output == edge->getChild()->getType()) return edge;
}
return edgesForSamePort[0];
}

View File

@@ -812,8 +812,34 @@ void Graph::AllocateWithReuse() {
}
if (!undefinedBoxes.empty()) {
// Use proxy memory manager for output edges
for (auto& box : undefinedBoxes) {
for (auto& edge : edge_clusters[box.id]) {
const auto child = edge->getChild();
if (child->getType() == Type::Output &&
edge->getStatus() == Edge::Status::NeedAllocation) {
auto proxyMemMngr =
std::make_shared<ProxyMemoryMngr>();
DEBUG_LOG("ProxyMemoryMngr ", proxyMemMngr, " ", this);
edge->allocate(proxyMemMngr);
// Store the output memory managers.
// So that, the infer requests can be able to access them.
int count = 0;
for (auto &output : outputNodesMap) {
if (output.second == child) {
outputNodesMemMngrMap[output.first] = proxyMemMngr;
count++;
}
}
// sometimes there are unused output ports.
IE_ASSERT(count <= 1) << "cannot find output node. count " << count;
}
}
}
if (!syncNodesInds.empty()) {
//We have to extend the lifespan of thensors that are crossing a sync point border in order to save
//We have to extend the lifespan of tensors that are crossing a sync point border in order to save
//the intermediate computation results from possible loss due to the tensor resize
std::vector<int> vecIntervals = {0};
for (const auto& item : syncNodesInds) {
@@ -990,6 +1016,7 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::
}
}
// suppose always being shared infer_request intel_cpu::Tensor to Graph if isDynamic.
void Graph::PullOutputData(BlobMap &out) {
if (!IsReady())
IE_THROW() << "Wrong state. Topology not ready.";
@@ -1006,6 +1033,8 @@ void Graph::PullOutputData(BlobMap &out) {
IE_THROW(Unexpected) << "The CPU plugin graph doesn't contain output node with name: \"" << name << "\"";
}
DEBUG_LOG(name, ", blob ", out[name], ", addr ", static_cast<void*>(out[name]->buffer()));
const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc());
auto &expectedDesc = ext_blob->getTensorDesc();
@@ -1029,7 +1058,12 @@ void Graph::PullOutputData(BlobMap &out) {
if (expectedDesc.getLayout() == InferenceEngine::Layout::BLOCKED) {
expectedDesc = TensorDesc(expectedDesc.getPrecision(), expectedDesc.getLayout());
}
DEBUG_LOG(name, ", blob ", out[name], ", addr ", static_cast<void*>(out[name]->buffer()),
" dims ", PartialShape(out[name]->getTensorDesc().getDims()), " -> ", PartialShape(outDims),
", intr ptr ", intr_blob.getData(), " , parentedge's memory object ", parentEdge->getMemoryPtr().get());
out[name]->setShape(outDims);
DEBUG_LOG(name, ", blob ", out[name], ", addr ", static_cast<void*>(out[name]->buffer()),
" dims ", PartialShape(out[name]->getTensorDesc().getDims()), ", intr ptr ", intr_blob.getData());
}
// check for empty output blob
@@ -1047,6 +1081,8 @@ void Graph::PullOutputData(BlobMap &out) {
void *ext_blob_ptr = ext_blob->buffer();
void *intr_blob_ptr = intr_blob.getData();
DEBUG_LOG(name, " @ ", intr_blob_ptr, " -> ", ext_blob_ptr, " zero-copy: ", intr_blob_ptr == ext_blob_ptr, " graph ", this, "\r\n");
// That is the same memory. No need to copy
if (ext_blob_ptr == intr_blob_ptr) continue;
@@ -1313,13 +1349,12 @@ inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream)
DUMP(node, getConfig().debugCaps, infer_count);
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute);
DEBUG_LOG(*node);
if (node->isDynamicNode()) {
node->executeDynamic(stream);
} else {
node->execute(stream);
}
DEBUG_LOG(*node);
}
void Graph::Infer(InferRequestBase* request) {

View File

@@ -19,6 +19,8 @@
#include <memory>
#include <atomic>
#include "proxy_mem_mgr.h"
namespace ov {
namespace intel_cpu {
@@ -190,6 +192,8 @@ public:
return graphHasDynamicInput;
}
Status getStatus() const {return status;}
protected:
void VisitNode(NodePtr node, std::vector<NodePtr>& sortedNodes);
@@ -248,6 +252,8 @@ private:
std::map<std::string, NodePtr> inputNodesMap;
std::map<std::string, NodePtr> outputNodesMap;
std::unordered_map<std::string, ProxyMemoryMngrPtr> outputNodesMemMngrMap;
// these node pointers (from graphNodes) are to avoid regular checking for
// constantness of nodes in Infer methods and calls of
// non-executable (optimized out) nodes, such as Input, Reshape, etc.

View File

@@ -25,6 +25,9 @@
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include <transformations/utils/utils.hpp>
#include <ie_ngraph_utils.hpp>
#include "proxy_mem_mgr.h"
#include "openvino/runtime/make_tensor.hpp"
#include <utils/general_utils.h>
namespace ov {
namespace intel_cpu {
@@ -182,6 +185,13 @@ void InferRequestBase::InferImpl() {
ThrowIfCanceled();
// update output control blocks, if any, in order to refresh internal buffers
if (Graph::Status::ReadyDynamic == graph->getStatus()) {
for (auto&& item : outputControlBlocks) {
item.second.update();
}
}
graph->PullOutputData(_outputs);
}
@@ -202,93 +212,137 @@ static inline void changeEdgePtr(const EdgePtr &edge, InferenceEngine::Blob::Ptr
}
void InferRequestBase::changeDefaultPtr() {
const auto& inputNodesMap = graph->GetInputNodesMap();
const auto& outputNodesMap = graph->GetOutputNodesMap();
for (auto& it : externalPtr) {
const auto& inputNodesMap = graph->GetInputNodesMap();
auto input = inputNodesMap.find(it.first);
if (input != inputNodesMap.end()) {
NodePtr inputNodePtr = input->second;
if (inputNodePtr->getChildEdgeAt(0)->getMemory().getData() == static_cast<void*>(it.second->buffer()))
continue;
auto& childEdges = inputNodePtr->getChildEdges();
// Perform checks that the user's memory will not be modified
bool canBeInPlace = true;
for (auto& childEdge : childEdges) {
auto ce = childEdge.lock();
if (!ce)
if (inputNodesMap.end() == input) {
OPENVINO_ASSERT(outputNodesMap.count(it.first), "Cannot find input/output blob: ", it.first);
continue;
}
NodePtr inputNodePtr = input->second;
if (inputNodePtr->getChildEdgeAt(0)->getMemory().getData() == static_cast<void*>(it.second->buffer()))
continue;
auto& childEdges = inputNodePtr->getChildEdges();
// Perform checks that the user's memory will not be modified
bool canBeInPlace = true;
for (auto& childEdge : childEdges) {
auto ce = childEdge.lock();
if (!ce)
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
auto& child = ce->getChild();
if (child->isConstant()) {
canBeInPlace = false;
break;
}
// the input memory should be referenced by the children, otherwise it should be written to a
// specific location
if (ce->inPlace(Edge::LOOK_DOWN)) {
canBeInPlace = false;
break;
}
if (auto result = ce->modifiedInPlace()) {
canBeInPlace = false;
break;
}
if (child->getType() == Type::Concatenation && child->isInPlace()) {
canBeInPlace = false;
break;
}
}
if (canBeInPlace) {
for (auto& edge : childEdges) {
auto e = edge.lock();
if (!e)
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
auto& child = ce->getChild();
if (child->isConstant()) {
canBeInPlace = false;
break;
}
// the input memory should be referenced by the children, otherwise it should be written to a
// specific location
if (ce->inPlace(Edge::LOOK_DOWN)) {
canBeInPlace = false;
break;
}
if (auto result = ce->modifiedInPlace()) {
canBeInPlace = false;
break;
}
if (child->getType() == Type::Concatenation && child->isInPlace()) {
canBeInPlace = false;
break;
}
changeEdgePtr(e, it.second);
}
if (canBeInPlace) {
for (auto& edge : childEdges) {
auto e = edge.lock();
if (!e)
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
}
}
changeEdgePtr(e, it.second);
}
}
for (auto& it : externalPtr) {
const auto& name = it.first;
auto output = outputNodesMap.find(name);
if (outputNodesMap.end() == output) {
continue;
}
auto parentEdge = output->second->getParentEdgeAt(0);
const auto& outputNodesMap = graph->GetOutputNodesMap();
auto output = outputNodesMap.find(it.first);
if (output != outputNodesMap.end()) {
auto parentEdge = output->second->getParentEdgeAt(0);
if (parentEdge->getMemory().getData() == static_cast<void*>(it.second->buffer()))
continue;
if (parentEdge->getMemory().getData() == static_cast<void*>(it.second->buffer()))
continue;
bool canBeInPlace = true;
void* defaultPtr = parentEdge->getMemory().getData();
// Cannot be in-place after concat because concat is using different ptrs without offsets
auto parent = parentEdge->getParent();
NodePtr previousParent;
do {
previousParent = parent;
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) {
canBeInPlace = false;
bool canBeInPlace = true;
void* defaultPtr = parentEdge->getMemory().getData();
// Cannot be in-place after concat because concat is using different ptrs without offsets
auto parent = parentEdge->getParent();
NodePtr previousParent;
do {
previousParent = parent;
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) {
canBeInPlace = false;
break;
}
auto& parentEdges = parent->getParentEdges();
for (auto& edge : parentEdges) {
auto e = edge.lock();
if (!e)
IE_THROW() << "Node " << parent->getName() << " contains empty parent edge";
if (e->getMemory().getData() == defaultPtr) {
parent = e->getParent();
break;
}
}
} while (previousParent != parent);
if (canBeInPlace)
changeEdgePtr(parentEdge, it.second);
}
auto& parentEdges = parent->getParentEdges();
for (auto& edge : parentEdges) {
auto e = edge.lock();
if (!e)
IE_THROW() << "Node " << parent->getName() << " contains empty parent edge";
if (Graph::Status::ReadyDynamic == graph->getStatus()) {
const auto &outMemMngrMap = graph->outputNodesMemMngrMap;
for (auto&& item : outMemMngrMap) {
const auto& name = item.first;
if (e->getMemory().getData() == defaultPtr) {
parent = e->getParent();
break;
// share intel_cpu::Tensor to Graph by injecting to corresponding ProxyMemoryMngr instance.
auto outputMemMngr = item.second;
OPENVINO_ASSERT(outputMemMngr, "proxy mem manager for output ", name, " is empty.");
auto controlBlockItr = outputControlBlocks.find(name);
if (controlBlockItr != outputControlBlocks.end()) {
auto output = outputNodesMap.find(name);
OPENVINO_ASSERT(outputNodesMap.end() != output, "Node with name: ", name, " is absent in the outputNodesMap");
auto parentEdge = output->second->getParentEdgeAt(0);
//avoid cyclic memory use
auto parentNode = parentEdge->getParent();
const auto& parentNodeInpEdges = parentNode->getParentEdges();
std::unordered_set<const void*> parentInputPtrs(parentNodeInpEdges.size());
for (auto&& edge : parentNodeInpEdges) {
if (auto edgePtr = edge.lock()) {
parentInputPtrs.insert(edgePtr->getMemoryPtr()->getData());
}
}
} while (previousParent != parent);
if (canBeInPlace)
changeEdgePtr(parentEdge, it.second);
continue;
auto&& controlBlock = controlBlockItr->second;
std::shared_ptr<IMemoryMngr> memMngr = parentInputPtrs.count(controlBlock.rawPtr()) ? // same memory is used on the input and output
controlBlock.nextMemMngr() : // then swap internal buffer to avoid data corruption
controlBlock.currentMemMngr(); // else reuse the existing buffer
outputMemMngr->setMemMngr(memMngr);
DEBUG_LOG("reset proxy ", outputMemMngr, ", actual ", controlBlock.currentMemMngr(), " graph ", graph, " inferrequest ", this);
DEBUG_LOG(name, ", blob ", controlBlock.blob(), ", tensor ", controlBlock.tensor());
} else {
outputMemMngr->reset(); // switch to the internal memory since memory sharing is no longer possible
}
}
IE_THROW() << "Cannot find input/output blob: " << it.first;
}
}
@@ -716,6 +770,7 @@ void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob:
externalPtr.erase(name);
}
_outputs[name] = data;
outputControlBlocks.erase(name); // now the memory is under user's control
}
}
@@ -774,22 +829,39 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) {
if (_outputs.find(name) == _outputs.end()) {
auto outputNode = modelOutputsMap.find(name);
if (modelOutputsMap.find(name) != modelOutputsMap.end()) {
const auto shape = outputNode->second->get_input_partial_shape(0);
bool isDynamic = shape.is_dynamic();
const auto& model_shape = outputNode->second->get_input_partial_shape(0);
const auto& graph_shape = output->second->getInputShapeAtPort(0);
// WA, due to the transformations and constant folding, shape inference of the resulting model may
// have static shapes, while they are dynamic in the initial representation
const auto& shape = graph_shape.isDynamic() ? model_shape :
(model_shape.is_dynamic() ? graph_shape.toPartialShape() : model_shape);
const bool isDynamic = shape.is_dynamic();
if (!data) {
InferenceEngine::SizeVector dims;
if (isDynamic) {
dims = InferenceEngine::SizeVector(shape.rank().get_length(), 0);
const auto model_prec = InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0));
const auto graph_prec = output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision();
OutputControlBlock control_block{model_prec, Shape{shape}};
DEBUG_LOG(name,
", blob ", control_block.blob(),
", tensor ", control_block.tensor(),
", memmngr ", control_block.tensor()->get_memory()->getMemoryMngr(),
"memory object ", control_block.tensor()->get_memory().get());
data = control_block.blob();
if (model_prec == graph_prec) outputControlBlocks.emplace(std::make_pair(name, std::move(control_block)));
} else {
dims = shape.to_shape();
InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0)),
dims, InferenceEngine::TensorDesc::getLayoutByRank(dims.size()));
data = make_blob_with_precision(desc);
data->allocate();
}
InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0)),
dims, InferenceEngine::TensorDesc::getLayoutByRank(dims.size()));
data = make_blob_with_precision(desc);
data->allocate();
} else {
const auto& blobDims = data->getTensorDesc().getDims();
// in static shape case is enough information that shapes are incompatible to throw exception
@@ -831,9 +903,23 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) {
IE_THROW() << "Cannot find blob with name: " << name;
}
DEBUG_LOG(name, ", blob ", data, ", ", static_cast<void*>(data->buffer()));
return data;
}
void InferRequest::checkBlobs() {
for (auto const& input : _inputs) {
checkBlob(input.second, input.first, true);
}
// won't check dynamic output blobs as they are not allocated.
for (auto const& output : _outputs) {
const auto out_node = findOutputByNodeName(output.first);
const auto isDynamic = out_node && out_node->get_output_partial_shape(0).is_dynamic();
if (!isDynamic) checkBlob(output.second, output.first, false);
}
}
void InferRequest::PushInputData() {
for (auto input : _inputs) {
auto inputName = input.first;
@@ -845,5 +931,22 @@ void InferRequest::PushInputData() {
}
}
InferRequestBase::OutputControlBlock::OutputControlBlock(const InferenceEngine::Precision& precision, const Shape& shape) {
dnnl::engine eng(dnnl::engine::kind::cpu, 0);
m_buffers[m_buffIndx] = std::make_shared<MemoryMngrWithReuse>();
m_proxyMemMngr = std::make_shared<ProxyMemoryMngr>(m_buffers[m_buffIndx]);
Shape memShape = shape.isDynamic() ?
Shape{VectorDims(shape.getRank(), 0)} : // this is a WA since the ITensor doesn't allow dyn shapes
Shape{shape};
CpuBlockedMemoryDescPtr desc =
std::make_shared<CpuBlockedMemoryDesc>(precision, memShape);
auto memory = std::make_shared<Memory>(eng, desc, m_proxyMemMngr);
m_tensor = std::make_shared<Tensor>(memory);
m_blob = tensor_to_blob({m_tensor, nullptr});
}
} // namespace intel_cpu
} // namespace ov

View File

@@ -9,6 +9,7 @@
#include <string>
#include <map>
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
#include "cpu_tensor.h"
namespace ov {
namespace intel_cpu {
@@ -52,12 +53,65 @@ protected:
InferenceEngine::Precision normToInputSupportedPrec(const std::pair<const std::string, InferenceEngine::Blob::Ptr>& input) const;
void pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision dataType);
protected:
class OutputControlBlock {
public:
using MemMngrPtr = std::shared_ptr<MemoryMngrWithReuse>;
public:
OutputControlBlock(const InferenceEngine::Precision& precision, const Shape& shape);
OutputControlBlock(const OutputControlBlock&) = delete;
OutputControlBlock& operator=(const OutputControlBlock&) = delete;
OutputControlBlock(OutputControlBlock&&) = default;
OutputControlBlock& operator=(OutputControlBlock&&) = default;
InferenceEngine::Blob::Ptr blob() const {
return m_blob;
}
std::shared_ptr<Tensor> tensor() const {
return m_tensor;
}
const void* rawPtr() const {
return m_tensor->get_memory()->getData();
}
MemMngrPtr currentMemMngr() const {
return m_buffers[m_buffIndx];
}
MemMngrPtr nextMemMngr() {
m_buffIndx ^= 0x1;
if (!m_buffers[m_buffIndx]) {
m_buffers[m_buffIndx] = std::make_shared<MemoryMngrWithReuse>();
}
return m_buffers[m_buffIndx];
}
void update() {
m_proxyMemMngr->setMemMngr(currentMemMngr());
}
private:
std::shared_ptr<Tensor> m_tensor = nullptr;
InferenceEngine::Blob::Ptr m_blob = nullptr;
ProxyMemoryMngrPtr m_proxyMemMngr = nullptr;
std::array<MemMngrPtr, 2> m_buffers;
int m_buffIndx = 0;
};
protected:
virtual void initBlobs() = 0;
virtual void PushInputData() = 0;
Graph* graph = nullptr;
std::unordered_map<std::string, InferenceEngine::Blob::Ptr> externalPtr;
std::unordered_map<std::string, OutputControlBlock> outputControlBlocks;
private:
void PushStates();
void PullStates();
@@ -97,6 +151,8 @@ public:
void SetBlobsImpl(const std::string& name, const InferenceEngine::BatchedBlob::Ptr& batched_blob) override;
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
void checkBlobs() override;
private:
void PushInputData() override;
void initBlobs() override;

View File

@@ -338,9 +338,6 @@ void Reorder::execute(dnnl::stream strm) {
} else if (canUseNcsp2Nspc) {
optimizedNcsp2Nspc();
} else {
// src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData());
// dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData());
if (prim) {
prim.execute(strm, primArgs);
} else {

View File

@@ -0,0 +1,74 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "proxy_mem_mgr.h"
#include "utils/debug_capabilities.h"
using namespace ov::intel_cpu;
void ProxyMemoryMngr::setMemMngr(std::shared_ptr<IMemoryMngr> pMngr) {
OPENVINO_ASSERT(pMngr, "Attempt to set null memory manager to a ProxyMemoryMngr object");
if (m_pMngr == pMngr) {
return;
}
m_pMngr = pMngr;
m_pMngr->resize(m_size);
notifyUpdate();
}
void ProxyMemoryMngr::reset() {
if (!m_pOrigMngr) {
m_pOrigMngr = std::make_shared<MemoryMngrWithReuse>();
}
if (m_pMngr == m_pOrigMngr) {
return;
}
m_pMngr = m_pOrigMngr;
m_pMngr->resize(m_size);
notifyUpdate();
}
void* ProxyMemoryMngr::getRawPtr() const noexcept {
return m_pMngr->getRawPtr();
}
void ProxyMemoryMngr::setExtBuff(void* ptr, size_t size) {
m_pMngr->setExtBuff(ptr, size);
notifyUpdate();
}
bool ProxyMemoryMngr::resize(size_t size) {
auto res = m_pMngr->resize(size);
DEBUG_LOG(this, ", ", m_pMngr, " size ", m_size, " -> ", size, " resized? ", res, " RawPtr ", getRawPtr());
m_size = size;
notifyUpdate();
return res;
}
bool ProxyMemoryMngr::hasExtBuffer() const noexcept {
return m_pMngr->hasExtBuffer();
}
void ProxyMemoryMngr::registerMemory(Memory* memPtr) {
if (memPtr) {
m_setMemPtrs.insert(memPtr);
}
}
void ProxyMemoryMngr::unregisterMemory(Memory* memPtr) {
if (memPtr) {
m_setMemPtrs.erase(memPtr);
}
}
void ProxyMemoryMngr::notifyUpdate() {
for (auto& item : m_setMemPtrs) {
if (item) {
item->update();
}
}
}

View File

@@ -0,0 +1,52 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "cpu_memory.h"
namespace ov {
namespace intel_cpu {
/**
* @brief A proxy object that additionally implements observer pattern
*/
class ProxyMemoryMngr : public IMemoryMngrObserver {
public:
ProxyMemoryMngr() : m_pOrigMngr(std::make_shared<MemoryMngrWithReuse>()), m_pMngr(m_pOrigMngr) {}
explicit ProxyMemoryMngr(std::shared_ptr<IMemoryMngr> pMngr) {
OPENVINO_ASSERT(pMngr, "Memory manager is uninitialized");
m_pMngr = pMngr;
}
void* getRawPtr() const noexcept override;
void setExtBuff(void* ptr, size_t size) override;
bool resize(size_t size) override;
bool hasExtBuffer() const noexcept override;
void registerMemory(Memory* memPtr) override;
void unregisterMemory(Memory* memPtr) override;
void setMemMngr(std::shared_ptr<IMemoryMngr> pMngr);
void reset();
private:
void notifyUpdate();
// We keep the original MemMngr as may fallback to copy output.
std::shared_ptr<IMemoryMngr> m_pOrigMngr = nullptr;
std::shared_ptr<IMemoryMngr> m_pMngr = nullptr;
std::unordered_set<Memory*> m_setMemPtrs;
// WA: resize stage might not work because there is no shape change,
// but the underlying actual memory manager changes.
size_t m_size = 0ul;
};
using ProxyMemoryMngrPtr = std::shared_ptr<ProxyMemoryMngr>;
using ProxyMemoryMngrCPtr = std::shared_ptr<const ProxyMemoryMngr>;
} // namespace intel_cpu
} // namespace ov

View File

@@ -29,7 +29,7 @@ bool BrgemmBlocking::run(snippets::lowered::LinearIR& linear_ir) {
const auto& loop_manager = linear_ir.get_loop_manager();
const auto dim_idx = 1;
const size_t dim_idx = 1;
auto blocking_loop_exists = [&](const ov::snippets::lowered::ExpressionPtr& expr,
const std::shared_ptr<ov::intel_cpu::BrgemmCPU>& brgemm) {

View File

@@ -254,7 +254,7 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) {
} else {
// no SPD yet, use orginal shapes
comma = "";
for (int i = 0; i < node.getOriginalOutputPrecisions().size(); i++) {
for (size_t i = 0; i < node.getOriginalOutputPrecisions().size(); i++) {
auto shape = node.getOutputShapeAtPort(i);
std::string prec_name = "Undef";
prec_name = node.getOriginalOutputPrecisionAtPort(i).name();
@@ -282,6 +282,10 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) {
auto n = edge->getParent();
os << comma;
os << node_id(*edge->getParent());
auto ptr = edge->getMemoryPtr();
if (ptr) {
os << "_" << ptr->getData();
}
if (!is_single_output_port(*n))
os << "[" << edge->getInputNum() << "]";
comma = ",";

View File

@@ -0,0 +1,42 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <limits.h>
#include "behavior/ov_infer_request/iteration_chaining.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace ov::test::behavior;
namespace {
const std::vector<ov::AnyMap> configs = {
{}
};
const std::vector<ov::AnyMap> HeteroConfigs = {
{ov::device::priorities(CommonTestUtils::DEVICE_CPU)}
};
const std::vector<ov::AnyMap> AutoConfigs = {
{ov::device::priorities(CommonTestUtils::DEVICE_CPU)}
};
INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVIterationChaining,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::ValuesIn(configs)),
OVIterationChaining::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Hetero_BehaviorTests, OVIterationChaining,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_HETERO),
::testing::ValuesIn(HeteroConfigs)),
OVIterationChaining::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, OVIterationChaining,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_AUTO),
::testing::ValuesIn(AutoConfigs)),
OVIterationChaining::getTestCaseName);
} // namespace

View File

@@ -90,7 +90,8 @@ protected:
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));
auto customOp = std::make_shared<CustomOp>(paramOuts);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(customOp)};
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(customOp->output(0)),
std::make_shared<ngraph::opset3::Result>(customOp->output(1))};
function = std::make_shared<ngraph::Function>(results, inputParams, "customOpTest");
}

View File

@@ -0,0 +1,258 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gmock/gmock-spec-builders.h>
#include <gmock/gmock.h>
#include <gtest/gtest-param-test.h>
#include <gtest/gtest.h>
#include <openvino/core/shape.hpp>
#include <openvino/core/strides.hpp>
#include <openvino/core/type/element_type.hpp>
#include "openvino/core/except.hpp"
#include "openvino/core/partial_shape.hpp"
#include "cpu_memory.h"
#include "cpu_tensor.h"
#include "openvino/runtime/itensor.hpp"
#include "ie_ngraph_utils.hpp"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
using CPUTensorTest = ::testing::Test;
class MockBlockedMemoryDesc : public BlockedMemoryDesc {
public:
MockBlockedMemoryDesc(const Shape& _shape) : MemoryDesc(_shape, Blocked) {}
MOCK_METHOD(InferenceEngine::Precision, getPrecision, (), (const, override));
MOCK_METHOD(MemoryDescPtr, clone, (), (const, override));
MOCK_METHOD(size_t, getOffsetPadding, (), (const, override));
MOCK_METHOD(MemoryDescPtr, cloneWithNewDimsImp, (const VectorDims&), (const, override));
MOCK_METHOD(MemoryDescPtr, cloneWithNewPrecision, (const InferenceEngine::Precision), (const, override));
MOCK_METHOD(bool, isCompatible, (const MemoryDesc&), (const, override));
MOCK_METHOD(bool, hasLayoutType, (LayoutType), (const, override));
MOCK_METHOD(size_t, getMaxMemSize, (), (const, override));
MOCK_METHOD(const VectorDims&, getBlockDims, (), (const, override));
MOCK_METHOD(const VectorDims&, getOrder, (), (const, override));
MOCK_METHOD(const VectorDims&, getOffsetPaddingToData, (), (const, override));
MOCK_METHOD(const VectorDims&, getStrides, (), (const, override));
MOCK_METHOD(bool, blocksExtended, (), (const, override));
MOCK_METHOD(size_t, getPaddedElementsCount, (), (const, override));
MOCK_METHOD(bool, isCompatible, (const BlockedMemoryDesc &, CmpMask), (const, override));
MOCK_METHOD(void, setPrecision, (InferenceEngine::Precision), (override));
MOCK_METHOD(size_t, getCurrentMemSizeImp, (), (const, override));
MOCK_METHOD(size_t, getElementOffset, (size_t), (const, override));
MOCK_METHOD(bool, canComputeMemSizeZeroDims, (), (const, override));
MOCK_METHOD(bool, isDefinedImp, (), (const, override));
};
class MockIMemory : public IMemory {
public:
MockIMemory(MemoryDescPtr desc) : m_pMemDesc(desc) {}
MockIMemory(const MemoryDesc& desc) : m_pMemDesc(desc.clone()) {}
MOCK_METHOD(bool, isAllocated, (), (const, noexcept, override));
MOCK_METHOD(MemoryDesc&, getDesc, (), (const, override));
MOCK_METHOD(MemoryDescPtr, getDescPtr, (), (const, override));
MOCK_METHOD(size_t, getSize, (), (const, override));
MOCK_METHOD(const Shape&, getShape, (), (const, override));
MOCK_METHOD(const VectorDims&, getStaticDims, (), (const, override));
MOCK_METHOD(void, redefineDesc, (MemoryDescPtr), (override));
MOCK_METHOD(void, load, (const IMemory&, bool), (const, override));
MOCK_METHOD(MemoryMngrPtr, getMemoryMngr, (), (const, override));
MOCK_METHOD(dnnl::memory, getPrimitive, (), (const, override));
MOCK_METHOD(void, nullify, (), (override));
MOCK_METHOD(void*, getData, (), (const, override));
void set_memDesc(MemoryDescPtr memdesc) { m_pMemDesc = memdesc; }
void set_memDesc(const MemoryDesc& memdesc) { m_pMemDesc = memdesc.clone(); }
MemoryDesc& get_memDesc() const { return *m_pMemDesc; }
MemoryDescPtr get_memDescPtr() { return m_pMemDesc; }
private:
MemoryDescPtr m_pMemDesc;
};
// helper to get byte strides from strides.
static ov::Strides byte_strides(const ov::Strides& strides, const ov::element::Type& type) {
ov::Strides byte_strides(strides.size());
for (size_t i = 0; i < strides.size(); ++i)
byte_strides[i] = strides[i] * type.size();
return byte_strides;
}
// helper to create Memory of ncsp layout.
inline MemoryDescPtr create_memdesc(Precision prec, const Shape& shape, const VectorDims& strides = {}) {
ov::Shape ov_shape = shape.toPartialShape().to_shape();
const std::size_t totalSize = ov::shape_size(ov_shape);
auto elem_type = InferenceEngine::details::convertPrecision(prec);
auto memdesc = std::make_shared<MockBlockedMemoryDesc>(shape);
::testing::Mock::AllowLeak(memdesc.get());
EXPECT_CALL(*memdesc, hasLayoutType(::testing::Eq(LayoutType::ncsp))).WillRepeatedly(::testing::Return(true));
EXPECT_CALL(*memdesc, getPrecision).WillRepeatedly(::testing::Return(prec));
EXPECT_CALL(*memdesc, getStrides).WillRepeatedly(::testing::ReturnRef(strides));
EXPECT_CALL(*memdesc, canComputeMemSizeZeroDims).WillRepeatedly(::testing::Return(true));
EXPECT_CALL(*memdesc, isDefinedImp).WillRepeatedly(::testing::Return(true));
EXPECT_CALL(*memdesc, getCurrentMemSizeImp).WillRepeatedly(::testing::Return(totalSize * elem_type.size()));
return memdesc;
}
inline MemoryPtr create_memory(MemoryDescPtr memdesc) {
auto memptr = std::make_shared<MockIMemory>(memdesc);
::testing::Mock::AllowLeak(memptr.get());
// getDesc
EXPECT_CALL(*memptr, getDescPtr)
.Times(::testing::AnyNumber())
.WillRepeatedly([memptr]() {
return memptr->get_memDescPtr();
});
EXPECT_CALL(*memptr, getDesc).WillRepeatedly(::testing::ReturnRef(memptr->get_memDesc()));
// data
static size_t memSize = 0;
EXPECT_CALL(*memptr, getData)
.WillRepeatedly([memptr]() {
auto memdesc = memptr->get_memDescPtr();
auto required = memdesc->getCurrentMemSize();
if (memSize >= required) {
return reinterpret_cast<void*>(memSize);
} else {
memSize = required;
return reinterpret_cast<void*>(required);
}
});
// redefineDesc
ON_CALL(*memptr, redefineDesc).WillByDefault([memptr](MemoryDescPtr desc) {
memptr->set_memDesc(desc);
});
EXPECT_CALL(*memptr, redefineDesc).Times(::testing::AtLeast(1));
return memptr;
}
TEST_F(CPUTensorTest, canCreateTensor) {
Shape shape{4, 3, 2};
ov::Shape ov_shape = shape.toPartialShape().to_shape();
auto strides = ov::Strides({6, 2, 1});
const std::size_t totalSize = ov::shape_size(ov_shape);
ov::element::Type elem_type = ov::element::f32;
auto memptr = create_memory(create_memdesc(Precision::FP32, shape, strides));
{
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
ASSERT_EQ(totalSize, t->get_size());
ASSERT_NE(nullptr, t->data());
ASSERT_EQ(elem_type, t->get_element_type());
ASSERT_EQ(ov_shape, t->get_shape());
ASSERT_NE(ov_shape, t->get_strides());
ASSERT_EQ(byte_strides(ov::Strides({6, 2, 1}), t->get_element_type()), t->get_strides());
ASSERT_EQ(elem_type.size() * totalSize, t->get_byte_size());
ASSERT_THROW(t->data(ov::element::i64), ov::Exception);
ASSERT_THROW(t->data<std::int32_t>(), ov::Exception);
}
}
TEST_F(CPUTensorTest, canAccessF16Tensor) {
Shape shape = {4, 3, 2};
auto strides = ov::Strides({6, 2, 1});
auto memptr = create_memory(create_memdesc(Precision::FP16, shape, strides));
{
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
EXPECT_NE(nullptr, t->data());
ASSERT_EQ(ov::element::f16, t->get_element_type());
EXPECT_NO_THROW(t->data(ov::element::f16));
EXPECT_NO_THROW(t->data<ov::float16>());
EXPECT_THROW(t->data<ov::bfloat16>(), ov::Exception);
EXPECT_THROW(t->data<std::uint16_t>(), ov::Exception);
EXPECT_THROW(t->data<std::int16_t>(), ov::Exception);
}
}
// SetShape
TEST_F(CPUTensorTest, canSetShape) {
const Shape origShape = {1, 2, 3};
const ov::Shape ov_origShape = origShape.toPartialShape().to_shape();
auto strides = ov::Strides({6, 3, 1});
auto memdesc = create_memdesc(Precision::FP32, origShape, strides);
auto memptr = create_memory(memdesc);
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
const Shape newShape({4, 5, 6});
const ov::Shape ov_newShape = newShape.toPartialShape().to_shape();
auto new_strides = ov::Strides{30, 6, 1};
auto new_memdesc = create_memdesc(Precision::FP32, newShape, new_strides);
// set_shape to a bigger memory
{
auto blocked_memdesc = dynamic_cast<MockBlockedMemoryDesc*>(memdesc.get());
EXPECT_CALL(*blocked_memdesc, cloneWithNewDimsImp).WillRepeatedly(::testing::Return(new_memdesc));
const void* orig_data = t->data();
ASSERT_EQ(t->get_shape(), ov_origShape);
ASSERT_NO_THROW(t->set_shape(ov_newShape));
ASSERT_EQ(ov_newShape, t->get_shape());
ASSERT_EQ(byte_strides(ov::row_major_strides(ov_newShape), t->get_element_type()), t->get_strides());
ASSERT_NE(orig_data, t->data());
}
// set_shape for smaller memory - does not perform reallocation
{
auto new_blocked_memdesc = dynamic_cast<MockBlockedMemoryDesc*>(new_memdesc.get());
EXPECT_CALL(*new_blocked_memdesc, cloneWithNewDimsImp).WillRepeatedly(::testing::Return(memdesc));
const void* orig_data = t->data();
t->set_shape(ov_origShape);
ASSERT_EQ(ov_origShape, t->get_shape());
ASSERT_EQ(orig_data, t->data());
}
}
TEST_F(CPUTensorTest, canSyncMemoryAndTensor) {
const Shape origShape = {1, 2, 3};
const ov::Shape ov_origShape = origShape.toPartialShape().to_shape();
auto strides = ov::Strides({6, 3, 1});
auto memdesc = create_memdesc(Precision::FP32, origShape, strides);
auto memptr = create_memory(memdesc);
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
ASSERT_EQ(memptr->getDescPtr()->getShape().toPartialShape().to_shape(), t->get_shape());
ASSERT_EQ(byte_strides(memptr->getDescWithType<BlockedMemoryDesc>()->getStrides(), t->get_element_type()), t->get_strides());
const Shape newShape({4, 5, 6});
const ov::Shape ov_newShape = newShape.toPartialShape().to_shape();
auto new_strides = ov::Strides{30, 6, 1};
auto new_memdesc = create_memdesc(Precision::FP32, newShape, new_strides);
// reallocate memory out boundary of tensor instance
{
auto blocked_memdesc = dynamic_cast<MockBlockedMemoryDesc*>(memdesc.get());
EXPECT_CALL(*blocked_memdesc, cloneWithNewDimsImp).WillRepeatedly(::testing::Return(new_memdesc));
auto desc2 = memptr->getDescPtr()->cloneWithNewDims(newShape.getStaticDims(), true);
memptr->redefineDesc(desc2);
ASSERT_EQ(memptr->getDescPtr()->getShape().toPartialShape().to_shape(), t->get_shape());
ASSERT_EQ(byte_strides(memptr->getDescWithType<BlockedMemoryDesc>()->getStrides(), t->get_element_type()), t->get_strides());
}
}

View File

@@ -0,0 +1,156 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gmock/gmock-spec-builders.h>
#include <gmock/gmock.h>
#include <gtest/gtest-param-test.h>
#include <gtest/gtest.h>
#include <openvino/core/shape.hpp>
#include <openvino/core/strides.hpp>
#include <openvino/core/type/element_type.hpp>
#include "openvino/core/except.hpp"
#include "openvino/core/partial_shape.hpp"
#include "cpu_memory.h"
#include "cpu_tensor.h"
#include "openvino/runtime/itensor.hpp"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
using CPUTensorExtTest = ::testing::Test;
static ov::Strides byteStrides(const ov::Strides& strides, const ov::element::Type& type) {
ov::Strides byte_strides(strides.size());
for (size_t i = 0; i < strides.size(); ++i)
byte_strides[i] = strides[i] * type.size();
return byte_strides;
}
inline MemoryPtr create_memory(Precision prc, const Shape& shape) {
dnnl::engine eng(dnnl::engine::kind::cpu, 0);
CpuBlockedMemoryDescPtr desc;
desc = std::make_shared<CpuBlockedMemoryDesc>(prc, shape);
return std::make_shared<Memory>(eng, desc);
}
TEST_F(CPUTensorExtTest, canCreateTensor) {
Shape shape{4, 3, 2};
ov::Shape ov_shape = shape.toPartialShape().to_shape();
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, shape));
const std::size_t totalSize = ov::shape_size(ov_shape);
ASSERT_EQ(totalSize, t->get_size());
ASSERT_NE(nullptr, t->data());
ASSERT_EQ(ov::element::f32, t->get_element_type());
ASSERT_EQ(ov_shape, t->get_shape());
ASSERT_NE(ov_shape, t->get_strides());
ASSERT_EQ(byteStrides(ov::Strides({6, 2, 1}), t->get_element_type()), t->get_strides());
ASSERT_EQ(ov::element::f32.size() * totalSize, t->get_byte_size());
ASSERT_THROW(t->data(ov::element::i64), ov::Exception);
ASSERT_THROW(t->data<std::int32_t>(), ov::Exception);
}
TEST_F(CPUTensorExtTest, canAccessF16Tensor) {
Shape shape = {4, 3, 2};
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP16, shape));
EXPECT_NE(nullptr, t->data());
ASSERT_EQ(ov::element::f16, t->get_element_type());
EXPECT_NO_THROW(t->data(ov::element::f16));
EXPECT_NO_THROW(t->data<ov::float16>());
EXPECT_THROW(t->data<ov::bfloat16>(), ov::Exception);
EXPECT_THROW(t->data<std::uint16_t>(), ov::Exception);
EXPECT_THROW(t->data<std::int16_t>(), ov::Exception);
}
// SetShape
TEST_F(CPUTensorExtTest, canSetShape) {
const ov::Shape origShape({1, 2, 3});
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, {1, 2, 3}));
const ov::Shape newShape({4, 5, 6});
const void* orig_data = t->data();
ASSERT_EQ(t->get_shape(), origShape);
ASSERT_NO_THROW(t->set_shape({4, 5, 6}));
ASSERT_EQ(newShape, t->get_shape());
ASSERT_EQ(byteStrides(ov::row_major_strides(newShape), t->get_element_type()), t->get_strides());
ASSERT_NE(orig_data, t->data());
// set_shape for smaller memory - does not perform reallocation
{
orig_data = t->data();
t->set_shape(origShape);
ASSERT_EQ(origShape, t->get_shape());
ASSERT_EQ(orig_data, t->data());
}
}
TEST_F(CPUTensorExtTest, emptySize) {
ov::PartialShape pshape{0, 3, 2};
Shape shape{pshape};
const ov::Shape origShape({0, 3, 2});
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, shape));
ASSERT_EQ(ov::element::f32, t->get_element_type());
ASSERT_EQ(0, t->get_size());
ASSERT_EQ(0, t->get_byte_size());
ASSERT_EQ(origShape, t->get_shape());
ASSERT_EQ(byteStrides(ov::Strides({0, 0, 0}), t->get_element_type()), t->get_strides());
EXPECT_NO_THROW(t->data());
}
TEST_F(CPUTensorExtTest, canCreateTensorWithDynamicShape) {
ov::PartialShape pshape{-1, 3, 2};
Shape shape{pshape};
std::shared_ptr<ov::ITensor> t;
// construct with memory with dynamic shape
ASSERT_NO_THROW(t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, shape)));
ASSERT_THROW(t->get_shape(), ov::Exception);
ASSERT_THROW(t->get_strides(), ov::Exception);
// change memory to dynamic shape
{
auto memptr = create_memory(Precision::FP32, {4, 3, 2});
ASSERT_NO_THROW(t = std::make_shared<ov::intel_cpu::Tensor>(memptr));
ov::PartialShape pshape{{1, 10}, 3, 2};
CpuBlockedMemoryDescPtr desc2 = std::make_shared<CpuBlockedMemoryDesc>(Precision::FP32, Shape(pshape));
memptr->redefineDesc(desc2);
ASSERT_THROW(t->get_shape(), ov::Exception);
ASSERT_THROW(t->get_strides(), ov::Exception);
}
// set_shape
const ov::Shape newShape({4, 0, 2});
ASSERT_NO_THROW(t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, {4, 3, 2})));
const void* orig_data = t->data();
ASSERT_NO_THROW(t->set_shape({4, 0, 2}));
ASSERT_EQ(newShape, t->get_shape());
ASSERT_EQ(ov::Strides({0, 0, 0}), t->get_strides());
ASSERT_EQ(orig_data, t->data());
}
TEST_F(CPUTensorExtTest, canSyncMemoryAndTensor) {
Shape orig_shape{4, 3, 2};
auto memptr = create_memory(Precision::FP32, orig_shape);
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
ASSERT_EQ(memptr->getDescPtr()->getShape().toPartialShape().to_shape(), t->get_shape());
ASSERT_EQ(byteStrides(memptr->getDescWithType<BlockedMemoryDesc>()->getStrides(), t->get_element_type()), t->get_strides());
// reallocate memory out boundary of tensor instance
{
Shape new_shape{1, 5, 2};
auto desc2 = memptr->getDescPtr()->cloneWithNewDims(new_shape.getStaticDims(), true);
memptr->redefineDesc(desc2);
ASSERT_EQ(memptr->getDescPtr()->getShape().toPartialShape().to_shape(), t->get_shape());
ASSERT_EQ(byteStrides(memptr->getDescWithType<BlockedMemoryDesc>()->getStrides(), t->get_element_type()), t->get_strides());
}
}

View File

@@ -379,6 +379,10 @@ def compare_dump_file(ieb_file1, ieb_file2, visualize):
else:
diff_abs = np.abs(ieb1.value - ieb2.value)
if not np.all(diff_abs.shape):
print(" Shape{} has dim 0".format(ieb1.shape))
return
max_abs = np.amax(diff_abs)
max_idx = np.where(diff_abs >= max_abs)
max_org = np.abs(ieb2.value)[max_idx]

View File

@@ -0,0 +1,47 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <chrono>
#include <initializer_list>
#include <memory>
#include <string>
#include <tuple>
#include <vector>
#include "base/behavior_test_utils.hpp"
#include "openvino/core/attribute_visitor.hpp"
#include "openvino/core/model.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/partial_shape.hpp"
#include "openvino/core/rank.hpp"
#include "openvino/core/shape.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/core/type/element_type_traits.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/runtime/infer_request.hpp"
#include "openvino/runtime/tensor.hpp"
namespace ov {
namespace test {
namespace behavior {
struct OVIterationChaining : public OVInferRequestTests {
static std::string getTestCaseName(const testing::TestParamInfo<InferRequestParams>& obj);
void Run();
void SetUp() override;
void TearDown() override;
ov::InferRequest req;
private:
static std::shared_ptr<ov::Model> getIterativeFunction();
bool checkOutput(const ov::runtime::Tensor& in, const ov::runtime::Tensor& actual);
};
} // namespace behavior
} // namespace test
} // namespace ov

View File

@@ -188,6 +188,36 @@ TEST_P(OVInferRequestDynamicTests, InferDynamicNetworkSetOutputShapeBeforeInfer)
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor"), req.get_tensor(outputname)));
}
TEST_P(OVInferRequestDynamicTests, InferDynamicNetworkGetOutputThenSetOutputTensorPreAllocatedMemoryBeforeInfer) {
const std::string tensor_name = "input_tensor";
const ov::Shape refShape = inOutShapes[0].first;
const ov::Shape refOutShape = inOutShapes[0].second;
std::map<std::string, ov::PartialShape> shapes;
shapes[tensor_name] = {ov::Dimension::dynamic(), 4, 20, 20};
OV_ASSERT_NO_THROW(function->reshape(shapes));
// Load ov::Model to target plugins
auto execNet = ie->compile_model(function, target_device, configuration);
// Create InferRequest
ov::InferRequest req;
ov::runtime::Tensor tensor;
const std::string outputname = function->outputs().back().get_any_name();
OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
tensor = ov::test::utils::create_and_fill_tensor(element::f32, refShape, 100, -50);
OV_ASSERT_NO_THROW(req.set_tensor("input_tensor", tensor));
// first, get ouput tensor
OV_ASSERT_NO_THROW(req.infer());
ASSERT_EQ(req.get_tensor(outputname).get_shape(), refOutShape);
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor"), req.get_tensor(outputname)));
// then, set output tensor
float ptr[5000];
ov::runtime::Tensor otensor(element::f32, refOutShape, ptr);
OV_ASSERT_NO_THROW(req.set_tensor(outputname, otensor));
OV_ASSERT_NO_THROW(req.infer());
ASSERT_EQ(req.get_tensor(outputname).data<float>(), ptr);
ASSERT_EQ(req.get_tensor(outputname).get_shape(), refOutShape);
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor"), req.get_tensor(outputname)));
}
TEST_P(OVInferRequestDynamicTests, InferDynamicNetworkWithoutSetShape) {
const std::string tensor_name = "input_tensor";
std::map<std::string, ov::PartialShape> shapes;

View File

@@ -0,0 +1,121 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <chrono>
#include <gtest/gtest.h>
#include <initializer_list>
#include <memory>
#include <string>
#include <tuple>
#include <vector>
#include "base/ov_behavior_test_utils.hpp"
#include "openvino/core/attribute_visitor.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/partial_shape.hpp"
#include "openvino/core/rank.hpp"
#include "openvino/core/shape.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/core/type/element_type_traits.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/core/model.hpp"
#include "ngraph_functions/builders.hpp"
#include "openvino/runtime/infer_request.hpp"
#include "openvino/runtime/tensor.hpp"
#include "behavior/ov_infer_request/iteration_chaining.hpp"
namespace ov {
namespace test {
namespace behavior {
std::string OVIterationChaining::getTestCaseName(const testing::TestParamInfo<InferRequestParams>& obj) {
return OVInferRequestTests::getTestCaseName(obj);
}
std::shared_ptr<ov::Model> OVIterationChaining::getIterativeFunction() {
const ov::PartialShape pshape{-1, 16};
auto params = ngraph::builder::makeDynamicParams(element::Type_t::f32, {pshape});
params[0]->get_output_tensor(0).set_names({"input_tensor_0"});
params[0]->set_friendly_name("param_0");
auto concat_const = ngraph::builder::makeConstant(element::Type_t::f32, {1, 16}, std::vector<float>{}, true);
auto concat = ngraph::builder::makeConcat({params[0], concat_const}, 0 /*axis*/);
auto eltwise_const = ngraph::builder::makeConstant(element::Type_t::f32, {1, 16}, std::vector<float>{}, true);
auto eltwise = ngraph::builder::makeEltwise(concat, eltwise_const, ngraph::helpers::EltwiseTypes::ADD);
concat->get_output_tensor(0).set_names({"result_tensor_0"});
concat->set_friendly_name("result_0");
eltwise->get_output_tensor(0).set_names({"result_tensor_1"});
eltwise->set_friendly_name("result_1");
return std::make_shared<ov::Model>(ov::NodeVector{concat, eltwise}, ov::ParameterVector(params));
}
void OVIterationChaining::SetUp() {
std::tie(target_device, configuration) = this->GetParam();
// Skip test according to plugin specific disabledTestPatterns() (if any)
SKIP_IF_CURRENT_TEST_IS_DISABLED()
APIBaseTest::SetUp();
function = getIterativeFunction();
ov::AnyMap params;
for (auto&& v : configuration) {
params.emplace(v.first, v.second);
}
execNet = core->compile_model(function, target_device, params);
try {
req = execNet.create_infer_request();
} catch (const std::exception& ex) {
FAIL() << "Can't Create Infer Requiest in SetUp \nException [" << ex.what() << "]"
<< std::endl;
}
}
void OVIterationChaining::TearDown() {
req = {};
OVInferRequestTests::TearDown();
}
bool OVIterationChaining::checkOutput(const ov::runtime::Tensor& in, const ov::runtime::Tensor& actual) {
bool result = true;
auto net = core->compile_model(function, CommonTestUtils::DEVICE_TEMPLATE);
ov::InferRequest req;
req = net.create_infer_request();
auto tensor = req.get_tensor(function->inputs().back().get_any_name());
tensor.set_shape(in.get_shape());
for (int i = 0; i < in.get_size(); i++) {
tensor.data<float>()[i] = in.data<float>()[i];
}
req.infer();
for (int i = 0; i < actual.get_size(); i++) {
if (fabs(req.get_output_tensor(0).data<float>()[i] - actual.data<float>()[i]) > std::numeric_limits<float>::epsilon())
return false;
}
return result;
}
void OVIterationChaining::Run() {
// perform iteration chaining by iteratively
// setting input tensor to be output tensor of last inference, and
// beginnign with an empty tensor
ov::Tensor t0(element::Type_t::f32, {0, 16});
OV_ASSERT_NO_THROW(req.set_tensor("input_tensor_0", t0));
for (size_t i = 0; i < 10; i++) {
OV_ASSERT_NO_THROW(req.infer());
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor_0"), req.get_tensor("result_tensor_0")));
const auto t1 = req.get_tensor("result_tensor_0");
OV_ASSERT_NO_THROW(req.set_tensor("input_tensor_0", t1));
}
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor_0"), req.get_tensor("result_tensor_0")));
}
TEST_P(OVIterationChaining, Simple) {
// Skip test according to plugin specific disabledTestPatterns() (if any)
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
}
} // namespace behavior
} // namespace test
} // namespace ov