[CPU] Zero-copy optimizations for model outputs (#18476)
- Implement zero-copy output between plugin graph and infer request, thus eliminate memory copy overhead and optimize performance - Implement double buffer for InferRequest outputs
This commit is contained in:
@@ -105,7 +105,7 @@ void CommonOptimizations::SplitDimensionM(const std::shared_ptr<ov::snippets::op
|
||||
const auto needed_new_dim = m_dim / batch_dim_multiplier; // m / (LCM(b, nthrs) / b) - needed factors of dimension m
|
||||
|
||||
auto is_optimized = [&](size_t batch_m_dim, size_t new_m_dim) {
|
||||
return batch_m_dim != 1 && new_m_dim >= optimal_m_dim;
|
||||
return batch_m_dim != 1 && new_m_dim >= static_cast<size_t>(optimal_m_dim);
|
||||
};
|
||||
|
||||
if (batch_dim_multiplier * needed_new_dim == m_dim) {
|
||||
|
||||
@@ -71,9 +71,9 @@ ov::SoPtr<ITensor> make_tensor(const std::shared_ptr<InferenceEngine::Blob>& ten
|
||||
const InferenceEngine::Blob* get_hardware_blob(const InferenceEngine::Blob* blob);
|
||||
InferenceEngine::Blob* get_hardware_blob(InferenceEngine::Blob* blob);
|
||||
|
||||
std::shared_ptr<InferenceEngine::Blob> tensor_to_blob(const ov::SoPtr<ITensor>& tensor,
|
||||
bool unwrap = true,
|
||||
InferenceEngine::TensorDesc desc = {});
|
||||
OPENVINO_RUNTIME_API std::shared_ptr<InferenceEngine::Blob> tensor_to_blob(const ov::SoPtr<ITensor>& tensor,
|
||||
bool unwrap = true,
|
||||
InferenceEngine::TensorDesc desc = {});
|
||||
/** @endcond */
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
@@ -260,8 +260,9 @@ void ov::ISyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
|
||||
" expecting ",
|
||||
port.get_shape(),
|
||||
".");
|
||||
OPENVINO_ASSERT(std::dynamic_pointer_cast<ov::IRemoteTensor>(tensor._ptr) || tensor->data() != nullptr,
|
||||
"Tensor data equal nullptr!");
|
||||
OPENVINO_ASSERT(
|
||||
std::dynamic_pointer_cast<ov::IRemoteTensor>(tensor._ptr) || tensor->data() != nullptr || is_dynamic,
|
||||
"Tensor data equal nullptr!");
|
||||
}
|
||||
|
||||
void ov::ISyncInferRequest::allocate_tensor(
|
||||
|
||||
@@ -298,7 +298,9 @@ BlockingDesc::BlockingDesc(const SizeVector& blocked_dims,
|
||||
this->offsetPaddingToData = dimOffsets;
|
||||
|
||||
// check that strides are valid
|
||||
{
|
||||
if (!std::any_of(blocked_dims.begin(), blocked_dims.end(), [](const size_t dim) {
|
||||
return dim == 0ul;
|
||||
})) {
|
||||
size_t denseStride = 1;
|
||||
|
||||
for (size_t i = 1; i <= strides.size(); i++) {
|
||||
|
||||
@@ -32,6 +32,7 @@ namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
class Memory;
|
||||
class ProxyMemoryMngr;
|
||||
|
||||
/**
|
||||
* @interface IMemoryMngr
|
||||
@@ -313,6 +314,7 @@ public:
|
||||
|
||||
private:
|
||||
friend DnnlMemoryMngr;
|
||||
friend ProxyMemoryMngr;
|
||||
|
||||
private:
|
||||
void update();
|
||||
|
||||
98
src/plugins/intel_cpu/src/cpu_tensor.cpp
Normal file
98
src/plugins/intel_cpu/src/cpu_tensor.cpp
Normal file
@@ -0,0 +1,98 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cpu_tensor.h"
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
|
||||
#include "utils/debug_capabilities.h"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
Tensor::Tensor(MemoryPtr memptr) : m_memptr{memptr} {
|
||||
OPENVINO_ASSERT(m_memptr != nullptr);
|
||||
|
||||
// only support plain data format ncsp.
|
||||
auto memdesc = m_memptr->getDescPtr();
|
||||
OPENVINO_ASSERT(memdesc->hasLayoutType(LayoutType::ncsp), "intel_cpu::Tensor only supports memory with ncsp layout.");
|
||||
|
||||
m_element_type = InferenceEngine::details::convertPrecision(memdesc->getPrecision());
|
||||
}
|
||||
|
||||
void Tensor::set_shape(ov::Shape new_shape) {
|
||||
const auto& shape = m_memptr->getDescPtr()->getShape();
|
||||
if (shape.isStatic()) {
|
||||
DEBUG_LOG("tensor's memory object ", m_memptr.get(), ", ", vec2str(shape.getStaticDims()), " -> ", new_shape.to_string());
|
||||
if (shape.getStaticDims() == new_shape) return;
|
||||
}
|
||||
|
||||
auto desc = m_memptr->getDescPtr();
|
||||
const auto newDesc = desc->cloneWithNewDims(new_shape, true);
|
||||
m_memptr->redefineDesc(newDesc);
|
||||
}
|
||||
|
||||
const ov::element::Type& Tensor::get_element_type() const {
|
||||
return m_element_type;
|
||||
}
|
||||
|
||||
const ov::Shape& Tensor::get_shape() const {
|
||||
auto& shape = m_memptr->getDescPtr()->getShape();
|
||||
OPENVINO_ASSERT(shape.isStatic(), "intel_cpu::Tensor has dynamic shape.");
|
||||
|
||||
std::lock_guard<std::mutex> guard(m_lock);
|
||||
m_shape = ov::Shape{shape.getStaticDims()};
|
||||
return m_shape;
|
||||
}
|
||||
|
||||
size_t Tensor::get_size() const {
|
||||
auto& desc = m_memptr->getDesc();
|
||||
return desc.getShape().getElementsCount();
|
||||
}
|
||||
|
||||
size_t Tensor::get_byte_size() const {
|
||||
auto& desc = m_memptr->getDesc();
|
||||
return desc.getCurrentMemSize();
|
||||
}
|
||||
|
||||
const ov::Strides& Tensor::get_strides() const {
|
||||
OPENVINO_ASSERT(m_memptr->getDescPtr()->isDefined(), "intel_cpu::Tensor requires memory with defined strides.");
|
||||
|
||||
std::lock_guard<std::mutex> guard(m_lock);
|
||||
update_strides();
|
||||
return m_strides;
|
||||
}
|
||||
|
||||
void Tensor::update_strides() const {
|
||||
auto blocked_desc = m_memptr->getDescWithType<BlockedMemoryDesc>();
|
||||
OPENVINO_ASSERT(blocked_desc, "not a valid blocked memory descriptor.");
|
||||
auto& strides = blocked_desc->getStrides();
|
||||
m_strides.resize(strides.size());
|
||||
std::transform(strides.cbegin(), strides.cend(), m_strides.begin(),
|
||||
std::bind1st(std::multiplies<size_t>(), m_element_type.size()));
|
||||
}
|
||||
|
||||
void* Tensor::data(const element::Type& element_type) const {
|
||||
if (element_type != element::undefined && element_type != element::dynamic) {
|
||||
OPENVINO_ASSERT(element_type == get_element_type(),
|
||||
"Tensor data with element type ",
|
||||
get_element_type(),
|
||||
", is not representable as pointer to ",
|
||||
element_type);
|
||||
}
|
||||
return m_memptr->getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Creates tensor on graph memory
|
||||
*
|
||||
* @param mem Memory object
|
||||
*
|
||||
* @return Shared pointer to tensor interface
|
||||
*/
|
||||
std::shared_ptr<ITensor> make_tensor(MemoryPtr mem) {
|
||||
return std::make_shared<Tensor>(mem);
|
||||
}
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
48
src/plugins/intel_cpu/src/cpu_tensor.h
Normal file
48
src/plugins/intel_cpu/src/cpu_tensor.h
Normal file
@@ -0,0 +1,48 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/runtime/itensor.hpp"
|
||||
#include "cpu_memory.h"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
class Tensor : public ITensor {
|
||||
public:
|
||||
// Only plain data format is supported.
|
||||
explicit Tensor(MemoryPtr memptr);
|
||||
|
||||
void set_shape(ov::Shape shape) override;
|
||||
|
||||
const ov::element::Type& get_element_type() const override;
|
||||
|
||||
const ov::Shape& get_shape() const override;
|
||||
|
||||
size_t get_size() const override;
|
||||
|
||||
size_t get_byte_size() const override;
|
||||
|
||||
const ov::Strides& get_strides() const override;
|
||||
|
||||
void* data(const element::Type& type = {}) const override;
|
||||
|
||||
MemoryPtr get_memory() {return m_memptr;}
|
||||
|
||||
private:
|
||||
void update_strides() const;
|
||||
|
||||
MemoryPtr m_memptr;
|
||||
|
||||
ov::element::Type m_element_type;
|
||||
mutable ov::Shape m_shape;
|
||||
mutable ov::Strides m_strides;
|
||||
mutable std::mutex m_lock;
|
||||
};
|
||||
|
||||
std::shared_ptr<ITensor> make_tensor(MemoryPtr mem);
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
@@ -515,6 +515,13 @@ EdgePtr Edge::getBaseEdge(int look) {
|
||||
if (edge->inPlace() && edge != edgesForSamePort[0]) return edge;
|
||||
}
|
||||
}
|
||||
|
||||
// Return the first output edge as the base if there is no inPlace consumers
|
||||
// thus benefits zero-copy of outputs.
|
||||
for (auto edge : edgesForSamePort) {
|
||||
if (Type::Output == edge->getChild()->getType()) return edge;
|
||||
}
|
||||
|
||||
return edgesForSamePort[0];
|
||||
}
|
||||
|
||||
|
||||
@@ -812,8 +812,34 @@ void Graph::AllocateWithReuse() {
|
||||
}
|
||||
|
||||
if (!undefinedBoxes.empty()) {
|
||||
// Use proxy memory manager for output edges
|
||||
for (auto& box : undefinedBoxes) {
|
||||
for (auto& edge : edge_clusters[box.id]) {
|
||||
const auto child = edge->getChild();
|
||||
if (child->getType() == Type::Output &&
|
||||
edge->getStatus() == Edge::Status::NeedAllocation) {
|
||||
auto proxyMemMngr =
|
||||
std::make_shared<ProxyMemoryMngr>();
|
||||
DEBUG_LOG("ProxyMemoryMngr ", proxyMemMngr, " ", this);
|
||||
edge->allocate(proxyMemMngr);
|
||||
|
||||
// Store the output memory managers.
|
||||
// So that, the infer requests can be able to access them.
|
||||
int count = 0;
|
||||
for (auto &output : outputNodesMap) {
|
||||
if (output.second == child) {
|
||||
outputNodesMemMngrMap[output.first] = proxyMemMngr;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
// sometimes there are unused output ports.
|
||||
IE_ASSERT(count <= 1) << "cannot find output node. count " << count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!syncNodesInds.empty()) {
|
||||
//We have to extend the lifespan of thensors that are crossing a sync point border in order to save
|
||||
//We have to extend the lifespan of tensors that are crossing a sync point border in order to save
|
||||
//the intermediate computation results from possible loss due to the tensor resize
|
||||
std::vector<int> vecIntervals = {0};
|
||||
for (const auto& item : syncNodesInds) {
|
||||
@@ -990,6 +1016,7 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::
|
||||
}
|
||||
}
|
||||
|
||||
// suppose always being shared infer_request intel_cpu::Tensor to Graph if isDynamic.
|
||||
void Graph::PullOutputData(BlobMap &out) {
|
||||
if (!IsReady())
|
||||
IE_THROW() << "Wrong state. Topology not ready.";
|
||||
@@ -1006,6 +1033,8 @@ void Graph::PullOutputData(BlobMap &out) {
|
||||
IE_THROW(Unexpected) << "The CPU plugin graph doesn't contain output node with name: \"" << name << "\"";
|
||||
}
|
||||
|
||||
DEBUG_LOG(name, ", blob ", out[name], ", addr ", static_cast<void*>(out[name]->buffer()));
|
||||
|
||||
const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc());
|
||||
auto &expectedDesc = ext_blob->getTensorDesc();
|
||||
|
||||
@@ -1029,7 +1058,12 @@ void Graph::PullOutputData(BlobMap &out) {
|
||||
if (expectedDesc.getLayout() == InferenceEngine::Layout::BLOCKED) {
|
||||
expectedDesc = TensorDesc(expectedDesc.getPrecision(), expectedDesc.getLayout());
|
||||
}
|
||||
DEBUG_LOG(name, ", blob ", out[name], ", addr ", static_cast<void*>(out[name]->buffer()),
|
||||
" dims ", PartialShape(out[name]->getTensorDesc().getDims()), " -> ", PartialShape(outDims),
|
||||
", intr ptr ", intr_blob.getData(), " , parentedge's memory object ", parentEdge->getMemoryPtr().get());
|
||||
out[name]->setShape(outDims);
|
||||
DEBUG_LOG(name, ", blob ", out[name], ", addr ", static_cast<void*>(out[name]->buffer()),
|
||||
" dims ", PartialShape(out[name]->getTensorDesc().getDims()), ", intr ptr ", intr_blob.getData());
|
||||
}
|
||||
|
||||
// check for empty output blob
|
||||
@@ -1047,6 +1081,8 @@ void Graph::PullOutputData(BlobMap &out) {
|
||||
void *ext_blob_ptr = ext_blob->buffer();
|
||||
void *intr_blob_ptr = intr_blob.getData();
|
||||
|
||||
DEBUG_LOG(name, " @ ", intr_blob_ptr, " -> ", ext_blob_ptr, " zero-copy: ", intr_blob_ptr == ext_blob_ptr, " graph ", this, "\r\n");
|
||||
|
||||
// That is the same memory. No need to copy
|
||||
if (ext_blob_ptr == intr_blob_ptr) continue;
|
||||
|
||||
@@ -1313,13 +1349,12 @@ inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream)
|
||||
DUMP(node, getConfig().debugCaps, infer_count);
|
||||
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute);
|
||||
|
||||
DEBUG_LOG(*node);
|
||||
if (node->isDynamicNode()) {
|
||||
node->executeDynamic(stream);
|
||||
} else {
|
||||
node->execute(stream);
|
||||
}
|
||||
DEBUG_LOG(*node);
|
||||
}
|
||||
|
||||
void Graph::Infer(InferRequestBase* request) {
|
||||
|
||||
@@ -19,6 +19,8 @@
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
|
||||
#include "proxy_mem_mgr.h"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
@@ -190,6 +192,8 @@ public:
|
||||
return graphHasDynamicInput;
|
||||
}
|
||||
|
||||
Status getStatus() const {return status;}
|
||||
|
||||
protected:
|
||||
void VisitNode(NodePtr node, std::vector<NodePtr>& sortedNodes);
|
||||
|
||||
@@ -248,6 +252,8 @@ private:
|
||||
std::map<std::string, NodePtr> inputNodesMap;
|
||||
std::map<std::string, NodePtr> outputNodesMap;
|
||||
|
||||
std::unordered_map<std::string, ProxyMemoryMngrPtr> outputNodesMemMngrMap;
|
||||
|
||||
// these node pointers (from graphNodes) are to avoid regular checking for
|
||||
// constantness of nodes in Infer methods and calls of
|
||||
// non-executable (optimized out) nodes, such as Input, Reshape, etc.
|
||||
|
||||
@@ -25,6 +25,9 @@
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include "proxy_mem_mgr.h"
|
||||
#include "openvino/runtime/make_tensor.hpp"
|
||||
#include <utils/general_utils.h>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
@@ -182,6 +185,13 @@ void InferRequestBase::InferImpl() {
|
||||
|
||||
ThrowIfCanceled();
|
||||
|
||||
// update output control blocks, if any, in order to refresh internal buffers
|
||||
if (Graph::Status::ReadyDynamic == graph->getStatus()) {
|
||||
for (auto&& item : outputControlBlocks) {
|
||||
item.second.update();
|
||||
}
|
||||
}
|
||||
|
||||
graph->PullOutputData(_outputs);
|
||||
}
|
||||
|
||||
@@ -202,93 +212,137 @@ static inline void changeEdgePtr(const EdgePtr &edge, InferenceEngine::Blob::Ptr
|
||||
}
|
||||
|
||||
void InferRequestBase::changeDefaultPtr() {
|
||||
const auto& inputNodesMap = graph->GetInputNodesMap();
|
||||
const auto& outputNodesMap = graph->GetOutputNodesMap();
|
||||
for (auto& it : externalPtr) {
|
||||
const auto& inputNodesMap = graph->GetInputNodesMap();
|
||||
auto input = inputNodesMap.find(it.first);
|
||||
if (input != inputNodesMap.end()) {
|
||||
NodePtr inputNodePtr = input->second;
|
||||
if (inputNodePtr->getChildEdgeAt(0)->getMemory().getData() == static_cast<void*>(it.second->buffer()))
|
||||
continue;
|
||||
auto& childEdges = inputNodePtr->getChildEdges();
|
||||
// Perform checks that the user's memory will not be modified
|
||||
bool canBeInPlace = true;
|
||||
for (auto& childEdge : childEdges) {
|
||||
auto ce = childEdge.lock();
|
||||
if (!ce)
|
||||
if (inputNodesMap.end() == input) {
|
||||
OPENVINO_ASSERT(outputNodesMap.count(it.first), "Cannot find input/output blob: ", it.first);
|
||||
continue;
|
||||
}
|
||||
NodePtr inputNodePtr = input->second;
|
||||
if (inputNodePtr->getChildEdgeAt(0)->getMemory().getData() == static_cast<void*>(it.second->buffer()))
|
||||
continue;
|
||||
auto& childEdges = inputNodePtr->getChildEdges();
|
||||
// Perform checks that the user's memory will not be modified
|
||||
bool canBeInPlace = true;
|
||||
for (auto& childEdge : childEdges) {
|
||||
auto ce = childEdge.lock();
|
||||
if (!ce)
|
||||
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
|
||||
|
||||
auto& child = ce->getChild();
|
||||
|
||||
if (child->isConstant()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// the input memory should be referenced by the children, otherwise it should be written to a
|
||||
// specific location
|
||||
if (ce->inPlace(Edge::LOOK_DOWN)) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto result = ce->modifiedInPlace()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (child->getType() == Type::Concatenation && child->isInPlace()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (canBeInPlace) {
|
||||
for (auto& edge : childEdges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
|
||||
|
||||
auto& child = ce->getChild();
|
||||
|
||||
if (child->isConstant()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// the input memory should be referenced by the children, otherwise it should be written to a
|
||||
// specific location
|
||||
if (ce->inPlace(Edge::LOOK_DOWN)) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto result = ce->modifiedInPlace()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (child->getType() == Type::Concatenation && child->isInPlace()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
changeEdgePtr(e, it.second);
|
||||
}
|
||||
if (canBeInPlace) {
|
||||
for (auto& edge : childEdges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
|
||||
}
|
||||
}
|
||||
|
||||
changeEdgePtr(e, it.second);
|
||||
}
|
||||
}
|
||||
for (auto& it : externalPtr) {
|
||||
const auto& name = it.first;
|
||||
auto output = outputNodesMap.find(name);
|
||||
if (outputNodesMap.end() == output) {
|
||||
continue;
|
||||
}
|
||||
auto parentEdge = output->second->getParentEdgeAt(0);
|
||||
|
||||
const auto& outputNodesMap = graph->GetOutputNodesMap();
|
||||
auto output = outputNodesMap.find(it.first);
|
||||
if (output != outputNodesMap.end()) {
|
||||
auto parentEdge = output->second->getParentEdgeAt(0);
|
||||
if (parentEdge->getMemory().getData() == static_cast<void*>(it.second->buffer()))
|
||||
continue;
|
||||
if (parentEdge->getMemory().getData() == static_cast<void*>(it.second->buffer()))
|
||||
continue;
|
||||
|
||||
bool canBeInPlace = true;
|
||||
void* defaultPtr = parentEdge->getMemory().getData();
|
||||
// Cannot be in-place after concat because concat is using different ptrs without offsets
|
||||
auto parent = parentEdge->getParent();
|
||||
NodePtr previousParent;
|
||||
do {
|
||||
previousParent = parent;
|
||||
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) {
|
||||
canBeInPlace = false;
|
||||
bool canBeInPlace = true;
|
||||
void* defaultPtr = parentEdge->getMemory().getData();
|
||||
// Cannot be in-place after concat because concat is using different ptrs without offsets
|
||||
auto parent = parentEdge->getParent();
|
||||
NodePtr previousParent;
|
||||
do {
|
||||
previousParent = parent;
|
||||
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
auto& parentEdges = parent->getParentEdges();
|
||||
for (auto& edge : parentEdges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << parent->getName() << " contains empty parent edge";
|
||||
|
||||
if (e->getMemory().getData() == defaultPtr) {
|
||||
parent = e->getParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (previousParent != parent);
|
||||
if (canBeInPlace)
|
||||
changeEdgePtr(parentEdge, it.second);
|
||||
}
|
||||
|
||||
auto& parentEdges = parent->getParentEdges();
|
||||
for (auto& edge : parentEdges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << parent->getName() << " contains empty parent edge";
|
||||
if (Graph::Status::ReadyDynamic == graph->getStatus()) {
|
||||
const auto &outMemMngrMap = graph->outputNodesMemMngrMap;
|
||||
for (auto&& item : outMemMngrMap) {
|
||||
const auto& name = item.first;
|
||||
|
||||
if (e->getMemory().getData() == defaultPtr) {
|
||||
parent = e->getParent();
|
||||
break;
|
||||
// share intel_cpu::Tensor to Graph by injecting to corresponding ProxyMemoryMngr instance.
|
||||
auto outputMemMngr = item.second;
|
||||
OPENVINO_ASSERT(outputMemMngr, "proxy mem manager for output ", name, " is empty.");
|
||||
|
||||
auto controlBlockItr = outputControlBlocks.find(name);
|
||||
|
||||
if (controlBlockItr != outputControlBlocks.end()) {
|
||||
auto output = outputNodesMap.find(name);
|
||||
OPENVINO_ASSERT(outputNodesMap.end() != output, "Node with name: ", name, " is absent in the outputNodesMap");
|
||||
auto parentEdge = output->second->getParentEdgeAt(0);
|
||||
//avoid cyclic memory use
|
||||
auto parentNode = parentEdge->getParent();
|
||||
const auto& parentNodeInpEdges = parentNode->getParentEdges();
|
||||
std::unordered_set<const void*> parentInputPtrs(parentNodeInpEdges.size());
|
||||
for (auto&& edge : parentNodeInpEdges) {
|
||||
if (auto edgePtr = edge.lock()) {
|
||||
parentInputPtrs.insert(edgePtr->getMemoryPtr()->getData());
|
||||
}
|
||||
}
|
||||
} while (previousParent != parent);
|
||||
if (canBeInPlace)
|
||||
changeEdgePtr(parentEdge, it.second);
|
||||
continue;
|
||||
|
||||
auto&& controlBlock = controlBlockItr->second;
|
||||
|
||||
std::shared_ptr<IMemoryMngr> memMngr = parentInputPtrs.count(controlBlock.rawPtr()) ? // same memory is used on the input and output
|
||||
controlBlock.nextMemMngr() : // then swap internal buffer to avoid data corruption
|
||||
controlBlock.currentMemMngr(); // else reuse the existing buffer
|
||||
|
||||
outputMemMngr->setMemMngr(memMngr);
|
||||
DEBUG_LOG("reset proxy ", outputMemMngr, ", actual ", controlBlock.currentMemMngr(), " graph ", graph, " inferrequest ", this);
|
||||
DEBUG_LOG(name, ", blob ", controlBlock.blob(), ", tensor ", controlBlock.tensor());
|
||||
} else {
|
||||
outputMemMngr->reset(); // switch to the internal memory since memory sharing is no longer possible
|
||||
}
|
||||
}
|
||||
IE_THROW() << "Cannot find input/output blob: " << it.first;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -716,6 +770,7 @@ void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob:
|
||||
externalPtr.erase(name);
|
||||
}
|
||||
_outputs[name] = data;
|
||||
outputControlBlocks.erase(name); // now the memory is under user's control
|
||||
}
|
||||
}
|
||||
|
||||
@@ -774,22 +829,39 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) {
|
||||
if (_outputs.find(name) == _outputs.end()) {
|
||||
auto outputNode = modelOutputsMap.find(name);
|
||||
if (modelOutputsMap.find(name) != modelOutputsMap.end()) {
|
||||
const auto shape = outputNode->second->get_input_partial_shape(0);
|
||||
bool isDynamic = shape.is_dynamic();
|
||||
const auto& model_shape = outputNode->second->get_input_partial_shape(0);
|
||||
const auto& graph_shape = output->second->getInputShapeAtPort(0);
|
||||
|
||||
// WA, due to the transformations and constant folding, shape inference of the resulting model may
|
||||
// have static shapes, while they are dynamic in the initial representation
|
||||
const auto& shape = graph_shape.isDynamic() ? model_shape :
|
||||
(model_shape.is_dynamic() ? graph_shape.toPartialShape() : model_shape);
|
||||
|
||||
const bool isDynamic = shape.is_dynamic();
|
||||
|
||||
if (!data) {
|
||||
InferenceEngine::SizeVector dims;
|
||||
if (isDynamic) {
|
||||
dims = InferenceEngine::SizeVector(shape.rank().get_length(), 0);
|
||||
const auto model_prec = InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0));
|
||||
const auto graph_prec = output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision();
|
||||
OutputControlBlock control_block{model_prec, Shape{shape}};
|
||||
|
||||
DEBUG_LOG(name,
|
||||
", blob ", control_block.blob(),
|
||||
", tensor ", control_block.tensor(),
|
||||
", memmngr ", control_block.tensor()->get_memory()->getMemoryMngr(),
|
||||
"memory object ", control_block.tensor()->get_memory().get());
|
||||
|
||||
data = control_block.blob();
|
||||
if (model_prec == graph_prec) outputControlBlocks.emplace(std::make_pair(name, std::move(control_block)));
|
||||
} else {
|
||||
dims = shape.to_shape();
|
||||
|
||||
InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0)),
|
||||
dims, InferenceEngine::TensorDesc::getLayoutByRank(dims.size()));
|
||||
data = make_blob_with_precision(desc);
|
||||
data->allocate();
|
||||
}
|
||||
|
||||
InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0)),
|
||||
dims, InferenceEngine::TensorDesc::getLayoutByRank(dims.size()));
|
||||
|
||||
data = make_blob_with_precision(desc);
|
||||
data->allocate();
|
||||
} else {
|
||||
const auto& blobDims = data->getTensorDesc().getDims();
|
||||
// in static shape case is enough information that shapes are incompatible to throw exception
|
||||
@@ -831,9 +903,23 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) {
|
||||
IE_THROW() << "Cannot find blob with name: " << name;
|
||||
}
|
||||
|
||||
DEBUG_LOG(name, ", blob ", data, ", ", static_cast<void*>(data->buffer()));
|
||||
return data;
|
||||
}
|
||||
|
||||
void InferRequest::checkBlobs() {
|
||||
for (auto const& input : _inputs) {
|
||||
checkBlob(input.second, input.first, true);
|
||||
}
|
||||
|
||||
// won't check dynamic output blobs as they are not allocated.
|
||||
for (auto const& output : _outputs) {
|
||||
const auto out_node = findOutputByNodeName(output.first);
|
||||
const auto isDynamic = out_node && out_node->get_output_partial_shape(0).is_dynamic();
|
||||
if (!isDynamic) checkBlob(output.second, output.first, false);
|
||||
}
|
||||
}
|
||||
|
||||
void InferRequest::PushInputData() {
|
||||
for (auto input : _inputs) {
|
||||
auto inputName = input.first;
|
||||
@@ -845,5 +931,22 @@ void InferRequest::PushInputData() {
|
||||
}
|
||||
}
|
||||
|
||||
InferRequestBase::OutputControlBlock::OutputControlBlock(const InferenceEngine::Precision& precision, const Shape& shape) {
|
||||
dnnl::engine eng(dnnl::engine::kind::cpu, 0);
|
||||
m_buffers[m_buffIndx] = std::make_shared<MemoryMngrWithReuse>();
|
||||
m_proxyMemMngr = std::make_shared<ProxyMemoryMngr>(m_buffers[m_buffIndx]);
|
||||
|
||||
Shape memShape = shape.isDynamic() ?
|
||||
Shape{VectorDims(shape.getRank(), 0)} : // this is a WA since the ITensor doesn't allow dyn shapes
|
||||
Shape{shape};
|
||||
|
||||
CpuBlockedMemoryDescPtr desc =
|
||||
std::make_shared<CpuBlockedMemoryDesc>(precision, memShape);
|
||||
|
||||
auto memory = std::make_shared<Memory>(eng, desc, m_proxyMemMngr);
|
||||
m_tensor = std::make_shared<Tensor>(memory);
|
||||
m_blob = tensor_to_blob({m_tensor, nullptr});
|
||||
}
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
||||
#include "cpu_tensor.h"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
@@ -52,12 +53,65 @@ protected:
|
||||
InferenceEngine::Precision normToInputSupportedPrec(const std::pair<const std::string, InferenceEngine::Blob::Ptr>& input) const;
|
||||
void pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision dataType);
|
||||
|
||||
protected:
|
||||
class OutputControlBlock {
|
||||
public:
|
||||
using MemMngrPtr = std::shared_ptr<MemoryMngrWithReuse>;
|
||||
|
||||
public:
|
||||
OutputControlBlock(const InferenceEngine::Precision& precision, const Shape& shape);
|
||||
|
||||
OutputControlBlock(const OutputControlBlock&) = delete;
|
||||
OutputControlBlock& operator=(const OutputControlBlock&) = delete;
|
||||
|
||||
OutputControlBlock(OutputControlBlock&&) = default;
|
||||
OutputControlBlock& operator=(OutputControlBlock&&) = default;
|
||||
|
||||
InferenceEngine::Blob::Ptr blob() const {
|
||||
return m_blob;
|
||||
}
|
||||
|
||||
std::shared_ptr<Tensor> tensor() const {
|
||||
return m_tensor;
|
||||
}
|
||||
|
||||
const void* rawPtr() const {
|
||||
return m_tensor->get_memory()->getData();
|
||||
}
|
||||
|
||||
MemMngrPtr currentMemMngr() const {
|
||||
return m_buffers[m_buffIndx];
|
||||
}
|
||||
|
||||
MemMngrPtr nextMemMngr() {
|
||||
m_buffIndx ^= 0x1;
|
||||
if (!m_buffers[m_buffIndx]) {
|
||||
m_buffers[m_buffIndx] = std::make_shared<MemoryMngrWithReuse>();
|
||||
}
|
||||
return m_buffers[m_buffIndx];
|
||||
}
|
||||
|
||||
void update() {
|
||||
m_proxyMemMngr->setMemMngr(currentMemMngr());
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<Tensor> m_tensor = nullptr;
|
||||
InferenceEngine::Blob::Ptr m_blob = nullptr;
|
||||
ProxyMemoryMngrPtr m_proxyMemMngr = nullptr;
|
||||
std::array<MemMngrPtr, 2> m_buffers;
|
||||
int m_buffIndx = 0;
|
||||
};
|
||||
|
||||
protected:
|
||||
virtual void initBlobs() = 0;
|
||||
virtual void PushInputData() = 0;
|
||||
|
||||
Graph* graph = nullptr;
|
||||
std::unordered_map<std::string, InferenceEngine::Blob::Ptr> externalPtr;
|
||||
|
||||
std::unordered_map<std::string, OutputControlBlock> outputControlBlocks;
|
||||
|
||||
private:
|
||||
void PushStates();
|
||||
void PullStates();
|
||||
@@ -97,6 +151,8 @@ public:
|
||||
void SetBlobsImpl(const std::string& name, const InferenceEngine::BatchedBlob::Ptr& batched_blob) override;
|
||||
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
|
||||
|
||||
void checkBlobs() override;
|
||||
|
||||
private:
|
||||
void PushInputData() override;
|
||||
void initBlobs() override;
|
||||
|
||||
@@ -338,9 +338,6 @@ void Reorder::execute(dnnl::stream strm) {
|
||||
} else if (canUseNcsp2Nspc) {
|
||||
optimizedNcsp2Nspc();
|
||||
} else {
|
||||
// src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData());
|
||||
// dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData());
|
||||
|
||||
if (prim) {
|
||||
prim.execute(strm, primArgs);
|
||||
} else {
|
||||
|
||||
74
src/plugins/intel_cpu/src/proxy_mem_mgr.cpp
Normal file
74
src/plugins/intel_cpu/src/proxy_mem_mgr.cpp
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "proxy_mem_mgr.h"
|
||||
#include "utils/debug_capabilities.h"
|
||||
|
||||
using namespace ov::intel_cpu;
|
||||
|
||||
void ProxyMemoryMngr::setMemMngr(std::shared_ptr<IMemoryMngr> pMngr) {
|
||||
OPENVINO_ASSERT(pMngr, "Attempt to set null memory manager to a ProxyMemoryMngr object");
|
||||
if (m_pMngr == pMngr) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_pMngr = pMngr;
|
||||
m_pMngr->resize(m_size);
|
||||
notifyUpdate();
|
||||
}
|
||||
|
||||
void ProxyMemoryMngr::reset() {
|
||||
if (!m_pOrigMngr) {
|
||||
m_pOrigMngr = std::make_shared<MemoryMngrWithReuse>();
|
||||
}
|
||||
|
||||
if (m_pMngr == m_pOrigMngr) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_pMngr = m_pOrigMngr;
|
||||
m_pMngr->resize(m_size);
|
||||
notifyUpdate();
|
||||
}
|
||||
|
||||
void* ProxyMemoryMngr::getRawPtr() const noexcept {
|
||||
return m_pMngr->getRawPtr();
|
||||
}
|
||||
|
||||
void ProxyMemoryMngr::setExtBuff(void* ptr, size_t size) {
|
||||
m_pMngr->setExtBuff(ptr, size);
|
||||
notifyUpdate();
|
||||
}
|
||||
|
||||
bool ProxyMemoryMngr::resize(size_t size) {
|
||||
auto res = m_pMngr->resize(size);
|
||||
DEBUG_LOG(this, ", ", m_pMngr, " size ", m_size, " -> ", size, " resized? ", res, " RawPtr ", getRawPtr());
|
||||
m_size = size;
|
||||
notifyUpdate();
|
||||
return res;
|
||||
}
|
||||
|
||||
bool ProxyMemoryMngr::hasExtBuffer() const noexcept {
|
||||
return m_pMngr->hasExtBuffer();
|
||||
}
|
||||
|
||||
void ProxyMemoryMngr::registerMemory(Memory* memPtr) {
|
||||
if (memPtr) {
|
||||
m_setMemPtrs.insert(memPtr);
|
||||
}
|
||||
}
|
||||
|
||||
void ProxyMemoryMngr::unregisterMemory(Memory* memPtr) {
|
||||
if (memPtr) {
|
||||
m_setMemPtrs.erase(memPtr);
|
||||
}
|
||||
}
|
||||
|
||||
void ProxyMemoryMngr::notifyUpdate() {
|
||||
for (auto& item : m_setMemPtrs) {
|
||||
if (item) {
|
||||
item->update();
|
||||
}
|
||||
}
|
||||
}
|
||||
52
src/plugins/intel_cpu/src/proxy_mem_mgr.h
Normal file
52
src/plugins/intel_cpu/src/proxy_mem_mgr.h
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cpu_memory.h"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
/**
|
||||
* @brief A proxy object that additionally implements observer pattern
|
||||
*/
|
||||
class ProxyMemoryMngr : public IMemoryMngrObserver {
|
||||
public:
|
||||
ProxyMemoryMngr() : m_pOrigMngr(std::make_shared<MemoryMngrWithReuse>()), m_pMngr(m_pOrigMngr) {}
|
||||
explicit ProxyMemoryMngr(std::shared_ptr<IMemoryMngr> pMngr) {
|
||||
OPENVINO_ASSERT(pMngr, "Memory manager is uninitialized");
|
||||
m_pMngr = pMngr;
|
||||
}
|
||||
|
||||
void* getRawPtr() const noexcept override;
|
||||
void setExtBuff(void* ptr, size_t size) override;
|
||||
bool resize(size_t size) override;
|
||||
bool hasExtBuffer() const noexcept override;
|
||||
|
||||
void registerMemory(Memory* memPtr) override;
|
||||
void unregisterMemory(Memory* memPtr) override;
|
||||
|
||||
void setMemMngr(std::shared_ptr<IMemoryMngr> pMngr);
|
||||
void reset();
|
||||
|
||||
private:
|
||||
void notifyUpdate();
|
||||
|
||||
// We keep the original MemMngr as may fallback to copy output.
|
||||
std::shared_ptr<IMemoryMngr> m_pOrigMngr = nullptr;
|
||||
std::shared_ptr<IMemoryMngr> m_pMngr = nullptr;
|
||||
|
||||
std::unordered_set<Memory*> m_setMemPtrs;
|
||||
|
||||
// WA: resize stage might not work because there is no shape change,
|
||||
// but the underlying actual memory manager changes.
|
||||
size_t m_size = 0ul;
|
||||
};
|
||||
|
||||
using ProxyMemoryMngrPtr = std::shared_ptr<ProxyMemoryMngr>;
|
||||
using ProxyMemoryMngrCPtr = std::shared_ptr<const ProxyMemoryMngr>;
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
@@ -29,7 +29,7 @@ bool BrgemmBlocking::run(snippets::lowered::LinearIR& linear_ir) {
|
||||
|
||||
|
||||
const auto& loop_manager = linear_ir.get_loop_manager();
|
||||
const auto dim_idx = 1;
|
||||
const size_t dim_idx = 1;
|
||||
|
||||
auto blocking_loop_exists = [&](const ov::snippets::lowered::ExpressionPtr& expr,
|
||||
const std::shared_ptr<ov::intel_cpu::BrgemmCPU>& brgemm) {
|
||||
|
||||
@@ -254,7 +254,7 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) {
|
||||
} else {
|
||||
// no SPD yet, use orginal shapes
|
||||
comma = "";
|
||||
for (int i = 0; i < node.getOriginalOutputPrecisions().size(); i++) {
|
||||
for (size_t i = 0; i < node.getOriginalOutputPrecisions().size(); i++) {
|
||||
auto shape = node.getOutputShapeAtPort(i);
|
||||
std::string prec_name = "Undef";
|
||||
prec_name = node.getOriginalOutputPrecisionAtPort(i).name();
|
||||
@@ -282,6 +282,10 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) {
|
||||
auto n = edge->getParent();
|
||||
os << comma;
|
||||
os << node_id(*edge->getParent());
|
||||
auto ptr = edge->getMemoryPtr();
|
||||
if (ptr) {
|
||||
os << "_" << ptr->getData();
|
||||
}
|
||||
if (!is_single_output_port(*n))
|
||||
os << "[" << edge->getInputNum() << "]";
|
||||
comma = ",";
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <limits.h>
|
||||
#include "behavior/ov_infer_request/iteration_chaining.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace ov::test::behavior;
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<ov::AnyMap> configs = {
|
||||
{}
|
||||
};
|
||||
|
||||
const std::vector<ov::AnyMap> HeteroConfigs = {
|
||||
{ov::device::priorities(CommonTestUtils::DEVICE_CPU)}
|
||||
};
|
||||
|
||||
const std::vector<ov::AnyMap> AutoConfigs = {
|
||||
{ov::device::priorities(CommonTestUtils::DEVICE_CPU)}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVIterationChaining,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(configs)),
|
||||
OVIterationChaining::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Hetero_BehaviorTests, OVIterationChaining,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_HETERO),
|
||||
::testing::ValuesIn(HeteroConfigs)),
|
||||
OVIterationChaining::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, OVIterationChaining,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_AUTO),
|
||||
::testing::ValuesIn(AutoConfigs)),
|
||||
OVIterationChaining::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
@@ -90,7 +90,8 @@ protected:
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));
|
||||
auto customOp = std::make_shared<CustomOp>(paramOuts);
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(customOp)};
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(customOp->output(0)),
|
||||
std::make_shared<ngraph::opset3::Result>(customOp->output(1))};
|
||||
function = std::make_shared<ngraph::Function>(results, inputParams, "customOpTest");
|
||||
}
|
||||
|
||||
|
||||
258
src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp
Normal file
258
src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp
Normal file
@@ -0,0 +1,258 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gmock/gmock-spec-builders.h>
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest-param-test.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <openvino/core/shape.hpp>
|
||||
#include <openvino/core/strides.hpp>
|
||||
#include <openvino/core/type/element_type.hpp>
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/core/partial_shape.hpp"
|
||||
|
||||
#include "cpu_memory.h"
|
||||
#include "cpu_tensor.h"
|
||||
#include "openvino/runtime/itensor.hpp"
|
||||
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
|
||||
using namespace ov::intel_cpu;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
using CPUTensorTest = ::testing::Test;
|
||||
|
||||
class MockBlockedMemoryDesc : public BlockedMemoryDesc {
|
||||
public:
|
||||
MockBlockedMemoryDesc(const Shape& _shape) : MemoryDesc(_shape, Blocked) {}
|
||||
|
||||
MOCK_METHOD(InferenceEngine::Precision, getPrecision, (), (const, override));
|
||||
MOCK_METHOD(MemoryDescPtr, clone, (), (const, override));
|
||||
MOCK_METHOD(size_t, getOffsetPadding, (), (const, override));
|
||||
|
||||
MOCK_METHOD(MemoryDescPtr, cloneWithNewDimsImp, (const VectorDims&), (const, override));
|
||||
|
||||
MOCK_METHOD(MemoryDescPtr, cloneWithNewPrecision, (const InferenceEngine::Precision), (const, override));
|
||||
MOCK_METHOD(bool, isCompatible, (const MemoryDesc&), (const, override));
|
||||
|
||||
MOCK_METHOD(bool, hasLayoutType, (LayoutType), (const, override));
|
||||
|
||||
MOCK_METHOD(size_t, getMaxMemSize, (), (const, override));
|
||||
|
||||
MOCK_METHOD(const VectorDims&, getBlockDims, (), (const, override));
|
||||
MOCK_METHOD(const VectorDims&, getOrder, (), (const, override));
|
||||
MOCK_METHOD(const VectorDims&, getOffsetPaddingToData, (), (const, override));
|
||||
MOCK_METHOD(const VectorDims&, getStrides, (), (const, override));
|
||||
MOCK_METHOD(bool, blocksExtended, (), (const, override));
|
||||
MOCK_METHOD(size_t, getPaddedElementsCount, (), (const, override));
|
||||
MOCK_METHOD(bool, isCompatible, (const BlockedMemoryDesc &, CmpMask), (const, override));
|
||||
|
||||
MOCK_METHOD(void, setPrecision, (InferenceEngine::Precision), (override));
|
||||
|
||||
MOCK_METHOD(size_t, getCurrentMemSizeImp, (), (const, override));
|
||||
|
||||
MOCK_METHOD(size_t, getElementOffset, (size_t), (const, override));
|
||||
MOCK_METHOD(bool, canComputeMemSizeZeroDims, (), (const, override));
|
||||
MOCK_METHOD(bool, isDefinedImp, (), (const, override));
|
||||
};
|
||||
|
||||
class MockIMemory : public IMemory {
|
||||
public:
|
||||
MockIMemory(MemoryDescPtr desc) : m_pMemDesc(desc) {}
|
||||
MockIMemory(const MemoryDesc& desc) : m_pMemDesc(desc.clone()) {}
|
||||
|
||||
MOCK_METHOD(bool, isAllocated, (), (const, noexcept, override));
|
||||
MOCK_METHOD(MemoryDesc&, getDesc, (), (const, override));
|
||||
MOCK_METHOD(MemoryDescPtr, getDescPtr, (), (const, override));
|
||||
|
||||
MOCK_METHOD(size_t, getSize, (), (const, override));
|
||||
MOCK_METHOD(const Shape&, getShape, (), (const, override));
|
||||
MOCK_METHOD(const VectorDims&, getStaticDims, (), (const, override));
|
||||
|
||||
MOCK_METHOD(void, redefineDesc, (MemoryDescPtr), (override));
|
||||
MOCK_METHOD(void, load, (const IMemory&, bool), (const, override));
|
||||
MOCK_METHOD(MemoryMngrPtr, getMemoryMngr, (), (const, override));
|
||||
|
||||
MOCK_METHOD(dnnl::memory, getPrimitive, (), (const, override));
|
||||
MOCK_METHOD(void, nullify, (), (override));
|
||||
MOCK_METHOD(void*, getData, (), (const, override));
|
||||
|
||||
void set_memDesc(MemoryDescPtr memdesc) { m_pMemDesc = memdesc; }
|
||||
void set_memDesc(const MemoryDesc& memdesc) { m_pMemDesc = memdesc.clone(); }
|
||||
MemoryDesc& get_memDesc() const { return *m_pMemDesc; }
|
||||
MemoryDescPtr get_memDescPtr() { return m_pMemDesc; }
|
||||
|
||||
private:
|
||||
MemoryDescPtr m_pMemDesc;
|
||||
};
|
||||
|
||||
// helper to get byte strides from strides.
|
||||
static ov::Strides byte_strides(const ov::Strides& strides, const ov::element::Type& type) {
|
||||
ov::Strides byte_strides(strides.size());
|
||||
for (size_t i = 0; i < strides.size(); ++i)
|
||||
byte_strides[i] = strides[i] * type.size();
|
||||
return byte_strides;
|
||||
}
|
||||
|
||||
// helper to create Memory of ncsp layout.
|
||||
inline MemoryDescPtr create_memdesc(Precision prec, const Shape& shape, const VectorDims& strides = {}) {
|
||||
ov::Shape ov_shape = shape.toPartialShape().to_shape();
|
||||
const std::size_t totalSize = ov::shape_size(ov_shape);
|
||||
auto elem_type = InferenceEngine::details::convertPrecision(prec);
|
||||
|
||||
auto memdesc = std::make_shared<MockBlockedMemoryDesc>(shape);
|
||||
::testing::Mock::AllowLeak(memdesc.get());
|
||||
|
||||
EXPECT_CALL(*memdesc, hasLayoutType(::testing::Eq(LayoutType::ncsp))).WillRepeatedly(::testing::Return(true));
|
||||
|
||||
EXPECT_CALL(*memdesc, getPrecision).WillRepeatedly(::testing::Return(prec));
|
||||
EXPECT_CALL(*memdesc, getStrides).WillRepeatedly(::testing::ReturnRef(strides));
|
||||
|
||||
EXPECT_CALL(*memdesc, canComputeMemSizeZeroDims).WillRepeatedly(::testing::Return(true));
|
||||
EXPECT_CALL(*memdesc, isDefinedImp).WillRepeatedly(::testing::Return(true));
|
||||
EXPECT_CALL(*memdesc, getCurrentMemSizeImp).WillRepeatedly(::testing::Return(totalSize * elem_type.size()));
|
||||
|
||||
return memdesc;
|
||||
}
|
||||
|
||||
inline MemoryPtr create_memory(MemoryDescPtr memdesc) {
|
||||
auto memptr = std::make_shared<MockIMemory>(memdesc);
|
||||
::testing::Mock::AllowLeak(memptr.get());
|
||||
|
||||
// getDesc
|
||||
EXPECT_CALL(*memptr, getDescPtr)
|
||||
.Times(::testing::AnyNumber())
|
||||
.WillRepeatedly([memptr]() {
|
||||
return memptr->get_memDescPtr();
|
||||
});
|
||||
EXPECT_CALL(*memptr, getDesc).WillRepeatedly(::testing::ReturnRef(memptr->get_memDesc()));
|
||||
|
||||
// data
|
||||
static size_t memSize = 0;
|
||||
EXPECT_CALL(*memptr, getData)
|
||||
.WillRepeatedly([memptr]() {
|
||||
auto memdesc = memptr->get_memDescPtr();
|
||||
auto required = memdesc->getCurrentMemSize();
|
||||
if (memSize >= required) {
|
||||
return reinterpret_cast<void*>(memSize);
|
||||
} else {
|
||||
memSize = required;
|
||||
return reinterpret_cast<void*>(required);
|
||||
}
|
||||
});
|
||||
|
||||
// redefineDesc
|
||||
ON_CALL(*memptr, redefineDesc).WillByDefault([memptr](MemoryDescPtr desc) {
|
||||
memptr->set_memDesc(desc);
|
||||
});
|
||||
EXPECT_CALL(*memptr, redefineDesc).Times(::testing::AtLeast(1));
|
||||
|
||||
return memptr;
|
||||
}
|
||||
|
||||
TEST_F(CPUTensorTest, canCreateTensor) {
|
||||
Shape shape{4, 3, 2};
|
||||
ov::Shape ov_shape = shape.toPartialShape().to_shape();
|
||||
auto strides = ov::Strides({6, 2, 1});
|
||||
const std::size_t totalSize = ov::shape_size(ov_shape);
|
||||
ov::element::Type elem_type = ov::element::f32;
|
||||
|
||||
auto memptr = create_memory(create_memdesc(Precision::FP32, shape, strides));
|
||||
{
|
||||
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
|
||||
ASSERT_EQ(totalSize, t->get_size());
|
||||
ASSERT_NE(nullptr, t->data());
|
||||
ASSERT_EQ(elem_type, t->get_element_type());
|
||||
ASSERT_EQ(ov_shape, t->get_shape());
|
||||
ASSERT_NE(ov_shape, t->get_strides());
|
||||
ASSERT_EQ(byte_strides(ov::Strides({6, 2, 1}), t->get_element_type()), t->get_strides());
|
||||
ASSERT_EQ(elem_type.size() * totalSize, t->get_byte_size());
|
||||
ASSERT_THROW(t->data(ov::element::i64), ov::Exception);
|
||||
ASSERT_THROW(t->data<std::int32_t>(), ov::Exception);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CPUTensorTest, canAccessF16Tensor) {
|
||||
Shape shape = {4, 3, 2};
|
||||
auto strides = ov::Strides({6, 2, 1});
|
||||
|
||||
auto memptr = create_memory(create_memdesc(Precision::FP16, shape, strides));
|
||||
{
|
||||
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
|
||||
EXPECT_NE(nullptr, t->data());
|
||||
ASSERT_EQ(ov::element::f16, t->get_element_type());
|
||||
EXPECT_NO_THROW(t->data(ov::element::f16));
|
||||
EXPECT_NO_THROW(t->data<ov::float16>());
|
||||
EXPECT_THROW(t->data<ov::bfloat16>(), ov::Exception);
|
||||
EXPECT_THROW(t->data<std::uint16_t>(), ov::Exception);
|
||||
EXPECT_THROW(t->data<std::int16_t>(), ov::Exception);
|
||||
}
|
||||
}
|
||||
|
||||
// SetShape
|
||||
TEST_F(CPUTensorTest, canSetShape) {
|
||||
const Shape origShape = {1, 2, 3};
|
||||
const ov::Shape ov_origShape = origShape.toPartialShape().to_shape();
|
||||
auto strides = ov::Strides({6, 3, 1});
|
||||
auto memdesc = create_memdesc(Precision::FP32, origShape, strides);
|
||||
auto memptr = create_memory(memdesc);
|
||||
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
|
||||
|
||||
const Shape newShape({4, 5, 6});
|
||||
const ov::Shape ov_newShape = newShape.toPartialShape().to_shape();
|
||||
auto new_strides = ov::Strides{30, 6, 1};
|
||||
auto new_memdesc = create_memdesc(Precision::FP32, newShape, new_strides);
|
||||
|
||||
// set_shape to a bigger memory
|
||||
{
|
||||
auto blocked_memdesc = dynamic_cast<MockBlockedMemoryDesc*>(memdesc.get());
|
||||
EXPECT_CALL(*blocked_memdesc, cloneWithNewDimsImp).WillRepeatedly(::testing::Return(new_memdesc));
|
||||
|
||||
const void* orig_data = t->data();
|
||||
ASSERT_EQ(t->get_shape(), ov_origShape);
|
||||
ASSERT_NO_THROW(t->set_shape(ov_newShape));
|
||||
ASSERT_EQ(ov_newShape, t->get_shape());
|
||||
ASSERT_EQ(byte_strides(ov::row_major_strides(ov_newShape), t->get_element_type()), t->get_strides());
|
||||
ASSERT_NE(orig_data, t->data());
|
||||
}
|
||||
|
||||
// set_shape for smaller memory - does not perform reallocation
|
||||
{
|
||||
auto new_blocked_memdesc = dynamic_cast<MockBlockedMemoryDesc*>(new_memdesc.get());
|
||||
EXPECT_CALL(*new_blocked_memdesc, cloneWithNewDimsImp).WillRepeatedly(::testing::Return(memdesc));
|
||||
const void* orig_data = t->data();
|
||||
t->set_shape(ov_origShape);
|
||||
ASSERT_EQ(ov_origShape, t->get_shape());
|
||||
ASSERT_EQ(orig_data, t->data());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CPUTensorTest, canSyncMemoryAndTensor) {
|
||||
const Shape origShape = {1, 2, 3};
|
||||
const ov::Shape ov_origShape = origShape.toPartialShape().to_shape();
|
||||
auto strides = ov::Strides({6, 3, 1});
|
||||
auto memdesc = create_memdesc(Precision::FP32, origShape, strides);
|
||||
auto memptr = create_memory(memdesc);
|
||||
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
|
||||
|
||||
ASSERT_EQ(memptr->getDescPtr()->getShape().toPartialShape().to_shape(), t->get_shape());
|
||||
ASSERT_EQ(byte_strides(memptr->getDescWithType<BlockedMemoryDesc>()->getStrides(), t->get_element_type()), t->get_strides());
|
||||
|
||||
const Shape newShape({4, 5, 6});
|
||||
const ov::Shape ov_newShape = newShape.toPartialShape().to_shape();
|
||||
auto new_strides = ov::Strides{30, 6, 1};
|
||||
auto new_memdesc = create_memdesc(Precision::FP32, newShape, new_strides);
|
||||
|
||||
// reallocate memory out boundary of tensor instance
|
||||
{
|
||||
auto blocked_memdesc = dynamic_cast<MockBlockedMemoryDesc*>(memdesc.get());
|
||||
EXPECT_CALL(*blocked_memdesc, cloneWithNewDimsImp).WillRepeatedly(::testing::Return(new_memdesc));
|
||||
|
||||
auto desc2 = memptr->getDescPtr()->cloneWithNewDims(newShape.getStaticDims(), true);
|
||||
memptr->redefineDesc(desc2);
|
||||
ASSERT_EQ(memptr->getDescPtr()->getShape().toPartialShape().to_shape(), t->get_shape());
|
||||
ASSERT_EQ(byte_strides(memptr->getDescWithType<BlockedMemoryDesc>()->getStrides(), t->get_element_type()), t->get_strides());
|
||||
}
|
||||
}
|
||||
156
src/plugins/intel_cpu/tests/unit/cpu_tensor_test_ext.cpp
Normal file
156
src/plugins/intel_cpu/tests/unit/cpu_tensor_test_ext.cpp
Normal file
@@ -0,0 +1,156 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gmock/gmock-spec-builders.h>
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest-param-test.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <openvino/core/shape.hpp>
|
||||
#include <openvino/core/strides.hpp>
|
||||
#include <openvino/core/type/element_type.hpp>
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/core/partial_shape.hpp"
|
||||
|
||||
#include "cpu_memory.h"
|
||||
#include "cpu_tensor.h"
|
||||
#include "openvino/runtime/itensor.hpp"
|
||||
|
||||
using namespace ov::intel_cpu;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
using CPUTensorExtTest = ::testing::Test;
|
||||
|
||||
static ov::Strides byteStrides(const ov::Strides& strides, const ov::element::Type& type) {
|
||||
ov::Strides byte_strides(strides.size());
|
||||
for (size_t i = 0; i < strides.size(); ++i)
|
||||
byte_strides[i] = strides[i] * type.size();
|
||||
return byte_strides;
|
||||
}
|
||||
|
||||
inline MemoryPtr create_memory(Precision prc, const Shape& shape) {
|
||||
dnnl::engine eng(dnnl::engine::kind::cpu, 0);
|
||||
CpuBlockedMemoryDescPtr desc;
|
||||
desc = std::make_shared<CpuBlockedMemoryDesc>(prc, shape);
|
||||
return std::make_shared<Memory>(eng, desc);
|
||||
}
|
||||
|
||||
TEST_F(CPUTensorExtTest, canCreateTensor) {
|
||||
Shape shape{4, 3, 2};
|
||||
ov::Shape ov_shape = shape.toPartialShape().to_shape();
|
||||
|
||||
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, shape));
|
||||
const std::size_t totalSize = ov::shape_size(ov_shape);
|
||||
ASSERT_EQ(totalSize, t->get_size());
|
||||
ASSERT_NE(nullptr, t->data());
|
||||
ASSERT_EQ(ov::element::f32, t->get_element_type());
|
||||
ASSERT_EQ(ov_shape, t->get_shape());
|
||||
ASSERT_NE(ov_shape, t->get_strides());
|
||||
ASSERT_EQ(byteStrides(ov::Strides({6, 2, 1}), t->get_element_type()), t->get_strides());
|
||||
ASSERT_EQ(ov::element::f32.size() * totalSize, t->get_byte_size());
|
||||
ASSERT_THROW(t->data(ov::element::i64), ov::Exception);
|
||||
ASSERT_THROW(t->data<std::int32_t>(), ov::Exception);
|
||||
}
|
||||
|
||||
TEST_F(CPUTensorExtTest, canAccessF16Tensor) {
|
||||
Shape shape = {4, 3, 2};
|
||||
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP16, shape));
|
||||
EXPECT_NE(nullptr, t->data());
|
||||
ASSERT_EQ(ov::element::f16, t->get_element_type());
|
||||
EXPECT_NO_THROW(t->data(ov::element::f16));
|
||||
EXPECT_NO_THROW(t->data<ov::float16>());
|
||||
EXPECT_THROW(t->data<ov::bfloat16>(), ov::Exception);
|
||||
EXPECT_THROW(t->data<std::uint16_t>(), ov::Exception);
|
||||
EXPECT_THROW(t->data<std::int16_t>(), ov::Exception);
|
||||
}
|
||||
|
||||
// SetShape
|
||||
TEST_F(CPUTensorExtTest, canSetShape) {
|
||||
const ov::Shape origShape({1, 2, 3});
|
||||
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, {1, 2, 3}));
|
||||
const ov::Shape newShape({4, 5, 6});
|
||||
|
||||
const void* orig_data = t->data();
|
||||
ASSERT_EQ(t->get_shape(), origShape);
|
||||
ASSERT_NO_THROW(t->set_shape({4, 5, 6}));
|
||||
ASSERT_EQ(newShape, t->get_shape());
|
||||
ASSERT_EQ(byteStrides(ov::row_major_strides(newShape), t->get_element_type()), t->get_strides());
|
||||
ASSERT_NE(orig_data, t->data());
|
||||
|
||||
// set_shape for smaller memory - does not perform reallocation
|
||||
{
|
||||
orig_data = t->data();
|
||||
t->set_shape(origShape);
|
||||
ASSERT_EQ(origShape, t->get_shape());
|
||||
ASSERT_EQ(orig_data, t->data());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CPUTensorExtTest, emptySize) {
|
||||
ov::PartialShape pshape{0, 3, 2};
|
||||
Shape shape{pshape};
|
||||
const ov::Shape origShape({0, 3, 2});
|
||||
|
||||
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, shape));
|
||||
|
||||
ASSERT_EQ(ov::element::f32, t->get_element_type());
|
||||
ASSERT_EQ(0, t->get_size());
|
||||
ASSERT_EQ(0, t->get_byte_size());
|
||||
ASSERT_EQ(origShape, t->get_shape());
|
||||
ASSERT_EQ(byteStrides(ov::Strides({0, 0, 0}), t->get_element_type()), t->get_strides());
|
||||
EXPECT_NO_THROW(t->data());
|
||||
}
|
||||
|
||||
TEST_F(CPUTensorExtTest, canCreateTensorWithDynamicShape) {
|
||||
ov::PartialShape pshape{-1, 3, 2};
|
||||
Shape shape{pshape};
|
||||
|
||||
std::shared_ptr<ov::ITensor> t;
|
||||
|
||||
// construct with memory with dynamic shape
|
||||
ASSERT_NO_THROW(t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, shape)));
|
||||
ASSERT_THROW(t->get_shape(), ov::Exception);
|
||||
ASSERT_THROW(t->get_strides(), ov::Exception);
|
||||
|
||||
// change memory to dynamic shape
|
||||
{
|
||||
auto memptr = create_memory(Precision::FP32, {4, 3, 2});
|
||||
ASSERT_NO_THROW(t = std::make_shared<ov::intel_cpu::Tensor>(memptr));
|
||||
|
||||
ov::PartialShape pshape{{1, 10}, 3, 2};
|
||||
CpuBlockedMemoryDescPtr desc2 = std::make_shared<CpuBlockedMemoryDesc>(Precision::FP32, Shape(pshape));
|
||||
memptr->redefineDesc(desc2);
|
||||
ASSERT_THROW(t->get_shape(), ov::Exception);
|
||||
ASSERT_THROW(t->get_strides(), ov::Exception);
|
||||
}
|
||||
|
||||
// set_shape
|
||||
const ov::Shape newShape({4, 0, 2});
|
||||
ASSERT_NO_THROW(t = std::make_shared<ov::intel_cpu::Tensor>(create_memory(Precision::FP32, {4, 3, 2})));
|
||||
|
||||
const void* orig_data = t->data();
|
||||
ASSERT_NO_THROW(t->set_shape({4, 0, 2}));
|
||||
ASSERT_EQ(newShape, t->get_shape());
|
||||
ASSERT_EQ(ov::Strides({0, 0, 0}), t->get_strides());
|
||||
ASSERT_EQ(orig_data, t->data());
|
||||
}
|
||||
|
||||
TEST_F(CPUTensorExtTest, canSyncMemoryAndTensor) {
|
||||
Shape orig_shape{4, 3, 2};
|
||||
|
||||
auto memptr = create_memory(Precision::FP32, orig_shape);
|
||||
std::shared_ptr<ov::ITensor> t = std::make_shared<ov::intel_cpu::Tensor>(memptr);
|
||||
ASSERT_EQ(memptr->getDescPtr()->getShape().toPartialShape().to_shape(), t->get_shape());
|
||||
ASSERT_EQ(byteStrides(memptr->getDescWithType<BlockedMemoryDesc>()->getStrides(), t->get_element_type()), t->get_strides());
|
||||
|
||||
// reallocate memory out boundary of tensor instance
|
||||
{
|
||||
Shape new_shape{1, 5, 2};
|
||||
|
||||
auto desc2 = memptr->getDescPtr()->cloneWithNewDims(new_shape.getStaticDims(), true);
|
||||
memptr->redefineDesc(desc2);
|
||||
ASSERT_EQ(memptr->getDescPtr()->getShape().toPartialShape().to_shape(), t->get_shape());
|
||||
ASSERT_EQ(byteStrides(memptr->getDescWithType<BlockedMemoryDesc>()->getStrides(), t->get_element_type()), t->get_strides());
|
||||
}
|
||||
}
|
||||
@@ -379,6 +379,10 @@ def compare_dump_file(ieb_file1, ieb_file2, visualize):
|
||||
else:
|
||||
diff_abs = np.abs(ieb1.value - ieb2.value)
|
||||
|
||||
if not np.all(diff_abs.shape):
|
||||
print(" Shape{} has dim 0".format(ieb1.shape))
|
||||
return
|
||||
|
||||
max_abs = np.amax(diff_abs)
|
||||
max_idx = np.where(diff_abs >= max_abs)
|
||||
max_org = np.abs(ieb2.value)[max_idx]
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <initializer_list>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "base/behavior_test_utils.hpp"
|
||||
#include "openvino/core/attribute_visitor.hpp"
|
||||
#include "openvino/core/model.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/partial_shape.hpp"
|
||||
#include "openvino/core/rank.hpp"
|
||||
#include "openvino/core/shape.hpp"
|
||||
#include "openvino/core/type/element_type.hpp"
|
||||
#include "openvino/core/type/element_type_traits.hpp"
|
||||
#include "openvino/op/parameter.hpp"
|
||||
#include "openvino/runtime/infer_request.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace test {
|
||||
namespace behavior {
|
||||
|
||||
struct OVIterationChaining : public OVInferRequestTests {
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<InferRequestParams>& obj);
|
||||
void Run();
|
||||
|
||||
void SetUp() override;
|
||||
void TearDown() override;
|
||||
|
||||
ov::InferRequest req;
|
||||
|
||||
private:
|
||||
static std::shared_ptr<ov::Model> getIterativeFunction();
|
||||
bool checkOutput(const ov::runtime::Tensor& in, const ov::runtime::Tensor& actual);
|
||||
};
|
||||
|
||||
} // namespace behavior
|
||||
} // namespace test
|
||||
} // namespace ov
|
||||
@@ -188,6 +188,36 @@ TEST_P(OVInferRequestDynamicTests, InferDynamicNetworkSetOutputShapeBeforeInfer)
|
||||
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor"), req.get_tensor(outputname)));
|
||||
}
|
||||
|
||||
TEST_P(OVInferRequestDynamicTests, InferDynamicNetworkGetOutputThenSetOutputTensorPreAllocatedMemoryBeforeInfer) {
|
||||
const std::string tensor_name = "input_tensor";
|
||||
const ov::Shape refShape = inOutShapes[0].first;
|
||||
const ov::Shape refOutShape = inOutShapes[0].second;
|
||||
std::map<std::string, ov::PartialShape> shapes;
|
||||
shapes[tensor_name] = {ov::Dimension::dynamic(), 4, 20, 20};
|
||||
OV_ASSERT_NO_THROW(function->reshape(shapes));
|
||||
// Load ov::Model to target plugins
|
||||
auto execNet = ie->compile_model(function, target_device, configuration);
|
||||
// Create InferRequest
|
||||
ov::InferRequest req;
|
||||
ov::runtime::Tensor tensor;
|
||||
const std::string outputname = function->outputs().back().get_any_name();
|
||||
OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
|
||||
tensor = ov::test::utils::create_and_fill_tensor(element::f32, refShape, 100, -50);
|
||||
OV_ASSERT_NO_THROW(req.set_tensor("input_tensor", tensor));
|
||||
// first, get ouput tensor
|
||||
OV_ASSERT_NO_THROW(req.infer());
|
||||
ASSERT_EQ(req.get_tensor(outputname).get_shape(), refOutShape);
|
||||
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor"), req.get_tensor(outputname)));
|
||||
// then, set output tensor
|
||||
float ptr[5000];
|
||||
ov::runtime::Tensor otensor(element::f32, refOutShape, ptr);
|
||||
OV_ASSERT_NO_THROW(req.set_tensor(outputname, otensor));
|
||||
OV_ASSERT_NO_THROW(req.infer());
|
||||
ASSERT_EQ(req.get_tensor(outputname).data<float>(), ptr);
|
||||
ASSERT_EQ(req.get_tensor(outputname).get_shape(), refOutShape);
|
||||
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor"), req.get_tensor(outputname)));
|
||||
}
|
||||
|
||||
TEST_P(OVInferRequestDynamicTests, InferDynamicNetworkWithoutSetShape) {
|
||||
const std::string tensor_name = "input_tensor";
|
||||
std::map<std::string, ov::PartialShape> shapes;
|
||||
|
||||
@@ -0,0 +1,121 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <chrono>
|
||||
#include <gtest/gtest.h>
|
||||
#include <initializer_list>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "base/ov_behavior_test_utils.hpp"
|
||||
#include "openvino/core/attribute_visitor.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/partial_shape.hpp"
|
||||
#include "openvino/core/rank.hpp"
|
||||
#include "openvino/core/shape.hpp"
|
||||
#include "openvino/core/type/element_type.hpp"
|
||||
#include "openvino/core/type/element_type_traits.hpp"
|
||||
#include "openvino/op/parameter.hpp"
|
||||
#include "openvino/core/model.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "openvino/runtime/infer_request.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "behavior/ov_infer_request/iteration_chaining.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace test {
|
||||
namespace behavior {
|
||||
std::string OVIterationChaining::getTestCaseName(const testing::TestParamInfo<InferRequestParams>& obj) {
|
||||
return OVInferRequestTests::getTestCaseName(obj);
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Model> OVIterationChaining::getIterativeFunction() {
|
||||
const ov::PartialShape pshape{-1, 16};
|
||||
auto params = ngraph::builder::makeDynamicParams(element::Type_t::f32, {pshape});
|
||||
params[0]->get_output_tensor(0).set_names({"input_tensor_0"});
|
||||
params[0]->set_friendly_name("param_0");
|
||||
auto concat_const = ngraph::builder::makeConstant(element::Type_t::f32, {1, 16}, std::vector<float>{}, true);
|
||||
auto concat = ngraph::builder::makeConcat({params[0], concat_const}, 0 /*axis*/);
|
||||
auto eltwise_const = ngraph::builder::makeConstant(element::Type_t::f32, {1, 16}, std::vector<float>{}, true);
|
||||
auto eltwise = ngraph::builder::makeEltwise(concat, eltwise_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
concat->get_output_tensor(0).set_names({"result_tensor_0"});
|
||||
concat->set_friendly_name("result_0");
|
||||
eltwise->get_output_tensor(0).set_names({"result_tensor_1"});
|
||||
eltwise->set_friendly_name("result_1");
|
||||
|
||||
return std::make_shared<ov::Model>(ov::NodeVector{concat, eltwise}, ov::ParameterVector(params));
|
||||
}
|
||||
|
||||
void OVIterationChaining::SetUp() {
|
||||
std::tie(target_device, configuration) = this->GetParam();
|
||||
// Skip test according to plugin specific disabledTestPatterns() (if any)
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
APIBaseTest::SetUp();
|
||||
function = getIterativeFunction();
|
||||
ov::AnyMap params;
|
||||
for (auto&& v : configuration) {
|
||||
params.emplace(v.first, v.second);
|
||||
}
|
||||
execNet = core->compile_model(function, target_device, params);
|
||||
|
||||
try {
|
||||
req = execNet.create_infer_request();
|
||||
} catch (const std::exception& ex) {
|
||||
FAIL() << "Can't Create Infer Requiest in SetUp \nException [" << ex.what() << "]"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void OVIterationChaining::TearDown() {
|
||||
req = {};
|
||||
OVInferRequestTests::TearDown();
|
||||
}
|
||||
|
||||
bool OVIterationChaining::checkOutput(const ov::runtime::Tensor& in, const ov::runtime::Tensor& actual) {
|
||||
bool result = true;
|
||||
auto net = core->compile_model(function, CommonTestUtils::DEVICE_TEMPLATE);
|
||||
ov::InferRequest req;
|
||||
req = net.create_infer_request();
|
||||
auto tensor = req.get_tensor(function->inputs().back().get_any_name());
|
||||
tensor.set_shape(in.get_shape());
|
||||
for (int i = 0; i < in.get_size(); i++) {
|
||||
tensor.data<float>()[i] = in.data<float>()[i];
|
||||
}
|
||||
req.infer();
|
||||
for (int i = 0; i < actual.get_size(); i++) {
|
||||
if (fabs(req.get_output_tensor(0).data<float>()[i] - actual.data<float>()[i]) > std::numeric_limits<float>::epsilon())
|
||||
return false;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void OVIterationChaining::Run() {
|
||||
// perform iteration chaining by iteratively
|
||||
// setting input tensor to be output tensor of last inference, and
|
||||
// beginnign with an empty tensor
|
||||
ov::Tensor t0(element::Type_t::f32, {0, 16});
|
||||
|
||||
OV_ASSERT_NO_THROW(req.set_tensor("input_tensor_0", t0));
|
||||
for (size_t i = 0; i < 10; i++) {
|
||||
OV_ASSERT_NO_THROW(req.infer());
|
||||
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor_0"), req.get_tensor("result_tensor_0")));
|
||||
|
||||
const auto t1 = req.get_tensor("result_tensor_0");
|
||||
OV_ASSERT_NO_THROW(req.set_tensor("input_tensor_0", t1));
|
||||
}
|
||||
ASSERT_TRUE(checkOutput(req.get_tensor("input_tensor_0"), req.get_tensor("result_tensor_0")));
|
||||
}
|
||||
|
||||
TEST_P(OVIterationChaining, Simple) {
|
||||
// Skip test according to plugin specific disabledTestPatterns() (if any)
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run();
|
||||
}
|
||||
|
||||
} // namespace behavior
|
||||
} // namespace test
|
||||
} // namespace ov
|
||||
Reference in New Issue
Block a user