Added classes for Sync and Async infer requests (#15387)

* Added classes for Sync and Async infer requests

* Changed hierarchy of Infer requests

* Fixed code style

* Fixed some tests

* Fixed naming style

* Fixed template plugin build

* Temporary disable python tests

* Revert "Temporary disable python tests"

This reverts commit c9aa9d79f8.

* Fixed template plugin tests

* Disable python tests

* Disable more steps

* Merged CI

* Revert "Merged CI"

This reverts commit 2f69574870.

* Try to fix segfault in python tests

* Remove default constructor

* Fixed documentation

* Fixed CPU tests

* Fixed Windows build

* Fixed comments

* Fixed build
This commit is contained in:
Ilya Churaev 2023-02-14 06:59:53 +04:00 committed by GitHub
parent 609dee0abc
commit b80d05e0e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 1769 additions and 380 deletions

View File

@ -19,6 +19,7 @@
namespace InferenceEngine {
class Blob;
class IAsyncInferRequestWrapper;
} // namespace InferenceEngine
namespace ov {
@ -28,6 +29,7 @@ class CoreImpl;
class InferRequest;
class RemoteContext;
class VariableState;
class IInferRequestInternalWrapper;
/**
* @brief Tensor API holding host memory
@ -52,6 +54,8 @@ protected:
friend class ov::InferRequest;
friend class ov::RemoteContext;
friend class ov::VariableState;
friend class ov::IInferRequestInternalWrapper;
friend class InferenceEngine::IAsyncInferRequestWrapper;
public:
/// @brief Default constructor

View File

@ -0,0 +1,276 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief OpenVINO Runtime AsyncInferRequest interface
* @file openvino/runtime/iasync_nfer_request.hpp
*/
#pragma once
#include <future>
#include <memory>
#include "openvino/runtime/common.hpp"
#include "openvino/runtime/exception.hpp"
#include "openvino/runtime/iinfer_request.hpp"
#include "openvino/runtime/profiling_info.hpp"
#include "openvino/runtime/tensor.hpp"
#include "threading/ie_itask_executor.hpp"
namespace ov {
/**
* @brief Base class with default implementation of asynchronous multi staged inference request.
* To customize pipeline stages derived class should change the content
* of IAsyncInferRequest::m_pipeline member container.
* It consists of pairs of tasks and executors which will run the task.
* The class is recommended to be used by plugins as a base class for asynchronous inference request
* implementation.
* @note To synchronize derived context with stages
* derived class should call IAsyncInferRequest::stop_and_wait() function in destructor.
* @par Example
* Here is an example of asynchronous inference request implementation for some accelerator device.
* It uses 5 different executors to run different stages of a synchronous inference request.
*/
class OPENVINO_RUNTIME_API IAsyncInferRequest : public IInferRequest {
public:
IAsyncInferRequest(const std::shared_ptr<IInferRequest>& request,
const InferenceEngine::ITaskExecutor::Ptr& task_executor,
const InferenceEngine::ITaskExecutor::Ptr& callback_executor);
~IAsyncInferRequest();
/**
* @brief Start inference of specified input(s) in asynchronous mode
* @note The method returns immediately. Inference starts also immediately.
*/
virtual void start_async();
/**
* @brief Waits for the result to become available.
*/
virtual void wait();
/**
* @brief Waits for the result to become available. Blocks until specified timeout has elapsed or the result
* becomes available, whichever comes first.
* @param timeout - maximum duration in milliseconds to block for
* @return A true if results are ready.
*/
virtual bool wait_for(const std::chrono::milliseconds& timeout);
/**
* @brief Cancel current inference request execution
*/
virtual void cancel();
/**
* @brief Set callback function which will be called on success or failure of asynchronous request
* @param callback - function to be called with the following description:
*/
virtual void set_callback(std::function<void(std::exception_ptr)> callback);
/**
* @brief Infers specified input(s) in synchronous mode
* @note blocks all method of InferRequest while request is ongoing (running or waiting in queue)
*/
void infer() override;
/**
* @brief Queries performance measures per layer to identify the most time consuming operation.
* @note Not all plugins provide meaningful data.
* @return Vector of profiling information for operations in a model.
*/
std::vector<ov::ProfilingInfo> get_profiling_info() const override;
/**
* @brief Gets an input/output tensor for inference.
* @note If the tensor with the specified @p port is not found, an exception is thrown.
* @param port Port of the tensor to get.
* @return Tensor for the port @p port.
*/
ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const override;
/**
* @brief Sets an input/output tensor to infer.
* @param port Port of the input or output tensor.
* @param tensor Reference to a tensor. The element_type and shape of a tensor must match
* the model's input/output element_type and size.
*/
void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override;
/**
* @brief Gets a batch of tensors for input data to infer by input port.
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
* The current version supports setting tensors to model inputs only. If @p port is associated
* with output (or any other non-input node), an exception is thrown.
*
* @param port Port of the input tensor.
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
* @return vector of tensors
*/
std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const override;
/**
* @brief Sets a batch of tensors for input data to infer by input port.
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
* The current version supports setting tensors to model inputs only. If @p port is associated
* with output (or any other non-input node), an exception is thrown.
*
* @param port Port of the input tensor.
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
*/
void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) override;
/**
* @brief Gets state control interface for the given infer request.
*
* State control essential for recurrent models.
* @return Vector of Variable State objects.
*/
std::vector<ov::VariableState> query_state() const override;
/**
* @brief Gets pointer to compiled model (usually synchronous request holds the compiled model)
*
* @return Pointer to the compiled model
*/
const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const override;
/**
* @brief Gets inputs for infer request
*
* @return vector of input ports
*/
const std::vector<ov::Output<const ov::Node>>& get_inputs() const override;
/**
* @brief Gets outputs for infer request
*
* @return vector of output ports
*/
const std::vector<ov::Output<const ov::Node>>& get_outputs() const override;
protected:
using Stage = std::pair<InferenceEngine::ITaskExecutor::Ptr, InferenceEngine::Task>;
/**
* @brief Pipeline is vector of stages
*/
using Pipeline = std::vector<Stage>;
/**
* @brief Forbids pipeline start and wait for all started pipelines.
* @note Should be called in derived class destructor to wait for completion of usage of derived context captured by
* pipeline tasks
*/
void stop_and_wait();
/**
* @brief Throws exception if inference request is busy or canceled
*/
void check_state() const;
/**
* @brief Performs inference of pipeline in syncronous mode
* @note Used by Infer which ensures thread-safety and calls this method after.
*/
virtual void infer_thread_unsafe();
/**
* @brief Starts an asynchronous pipeline thread unsafe.
* @note Used by start_async which ensures thread-safety and calls this method after.
*/
virtual void start_async_thread_unsafe();
/**
* @brief Check that all tensors are valid. Throws an exception if it's not.
*/
void check_tensors() const override;
Pipeline m_pipeline; //!< Pipeline variable that should be filled by inherited class.
Pipeline m_sync_pipeline; //!< Synchronous pipeline variable that should be filled by inherited class.
private:
enum InferState { IDLE, BUSY, CANCELLED, STOP };
using Futures = std::vector<std::shared_future<void>>;
enum Stage_e : std::uint8_t { EXECUTOR, TASK };
InferState m_state = InferState::IDLE;
Futures m_futures;
std::promise<void> m_promise;
friend struct DisableCallbackGuard;
struct DisableCallbackGuard {
explicit DisableCallbackGuard(IAsyncInferRequest* this_) : _this{this_} {
std::lock_guard<std::mutex> lock{_this->m_mutex};
std::swap(m_callback, _this->m_callback);
}
~DisableCallbackGuard() {
std::lock_guard<std::mutex> lock{_this->m_mutex};
_this->m_callback = m_callback;
}
IAsyncInferRequest* _this = nullptr;
std::function<void(std::exception_ptr)> m_callback;
};
void run_first_stage(const Pipeline::iterator itBeginStage,
const Pipeline::iterator itEndStage,
const InferenceEngine::ITaskExecutor::Ptr callbackExecutor = {});
InferenceEngine::Task make_next_stage_task(const Pipeline::iterator itStage,
const Pipeline::iterator itEndStage,
const InferenceEngine::ITaskExecutor::Ptr callbackExecutor);
template <typename F>
void infer_impl(const F& f) {
check_tensors();
InferState state = InferState::IDLE;
{
std::lock_guard<std::mutex> lock{m_mutex};
state = m_state;
switch (m_state) {
case InferState::BUSY:
throw ov::Busy("Infer Request is busy");
case InferState::CANCELLED:
throw ov::Cancelled("Infer Request was canceled");
case InferState::IDLE: {
m_futures.erase(std::remove_if(std::begin(m_futures),
std::end(m_futures),
[](const std::shared_future<void>& future) {
if (future.valid()) {
return (std::future_status::ready ==
future.wait_for(std::chrono::milliseconds{0}));
} else {
return true;
}
}),
m_futures.end());
m_promise = {};
m_futures.emplace_back(m_promise.get_future().share());
} break;
case InferState::STOP:
break;
}
m_state = InferState::BUSY;
}
if (state != InferState::STOP) {
try {
f();
} catch (...) {
m_promise.set_exception(std::current_exception());
std::lock_guard<std::mutex> lock{m_mutex};
m_state = InferState::IDLE;
throw;
}
}
}
std::shared_ptr<IInferRequest> m_sync_request;
InferenceEngine::ITaskExecutor::Ptr m_request_executor; //!< Used to run inference CPU tasks.
InferenceEngine::ITaskExecutor::Ptr
m_callback_executor; //!< Used to run post inference callback in asynchronous pipline
InferenceEngine::ITaskExecutor::Ptr
m_sync_callback_executor; //!< Used to run post inference callback in synchronous pipline
mutable std::mutex m_mutex;
std::function<void(std::exception_ptr)> m_callback;
};
} // namespace ov

View File

@ -13,15 +13,14 @@
#include <ostream>
#include <vector>
#include "cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/runtime/common.hpp"
#include "openvino/runtime/isync_infer_request.hpp"
#include "openvino/runtime/remote_context.hpp"
#include "threading/ie_cpu_streams_executor.hpp"
#include "threading/ie_itask_executor.hpp"
namespace InferenceEngine {
class IInferRequestInternal;
class ICompiledModelWrapper;
} // namespace InferenceEngine
@ -30,6 +29,7 @@ namespace ov {
class CoreImpl;
class IPlugin;
class IExecutableNetworkWrapper;
class IAsyncInferRequest;
/**
* @brief OpenVINO ICompiledModel interface
@ -73,9 +73,9 @@ public:
/**
* @brief Create infer request
*
* @return Infer request interface
* @return Asynchronous infer request interface
*/
virtual std::shared_ptr<InferenceEngine::IInferRequestInternal> create_infer_request() const;
virtual std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const;
/**
* @brief Export compiled model to stream
@ -141,7 +141,7 @@ protected:
*
* @return Sync infer request
*/
virtual std::shared_ptr<InferenceEngine::IInferRequestInternal> create_sync_infer_request() const = 0;
virtual std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const = 0;
/**
* @brief Default implementation of create async inter request method
@ -149,11 +149,11 @@ protected:
* @tparam AsyncInferRequestType Async infer request type. InferenceEngine::AsyncInferRequestThreadSafeDefault by
* default
*
* @return Async infer request
* @return Asynchronous infer request
*/
template <typename AsyncInferRequestType = InferenceEngine::AsyncInferRequestThreadSafeDefault>
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_async_infer_request() const {
std::shared_ptr<InferenceEngine::IInferRequestInternal> syncRequestImpl = this->create_sync_infer_request();
template <typename AsyncInferRequestType = ov::IAsyncInferRequest>
std::shared_ptr<ov::IAsyncInferRequest> create_async_infer_request() const {
auto syncRequestImpl = create_sync_infer_request();
return std::make_shared<AsyncInferRequestType>(syncRequestImpl, m_task_executor, m_callback_executor);
}

View File

@ -12,6 +12,7 @@
#include <memory>
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/runtime/tensor.hpp"
#include "so_ptr.hpp"

View File

@ -0,0 +1,121 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief OpenVINO Runtime InferRequest interface
* @file openvino/runtime/iinfer_request.hpp
*/
#pragma once
#include <exception>
#include <memory>
#include <unordered_map>
#include <vector>
#include "openvino/runtime/common.hpp"
#include "openvino/runtime/profiling_info.hpp"
#include "openvino/runtime/tensor.hpp"
namespace ov {
class IAsyncInferRequest;
class ICompiledModel;
class OPENVINO_RUNTIME_API IInferRequest {
public:
virtual ~IInferRequest();
/**
* @brief Infers specified input(s) in synchronous mode
* @note blocks all method of InferRequest while request is ongoing (running or waiting in queue)
*/
virtual void infer() = 0;
/**
* @brief Queries performance measures per layer to identify the most time consuming operation.
* @note Not all plugins provide meaningful data.
* @return Vector of profiling information for operations in a model.
*/
virtual std::vector<ov::ProfilingInfo> get_profiling_info() const = 0;
/**
* @brief Gets an input/output tensor for inference.
* @note If the tensor with the specified @p port is not found, an exception is thrown.
* @param port Port of the tensor to get.
* @return Tensor for the port @p port.
*/
virtual ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const = 0;
/**
* @brief Sets an input/output tensor to infer.
* @param port Port of the input or output tensor.
* @param tensor Reference to a tensor. The element_type and shape of a tensor must match
* the model's input/output element_type and size.
*/
virtual void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) = 0;
/**
* @brief Gets a batch of tensors for input data to infer by input port.
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
* The current version supports setting tensors to model inputs only. If @p port is associated
* with output (or any other non-input node), an exception is thrown.
*
* @param port Port of the input tensor.
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
* @return vector of tensors
*/
virtual std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const = 0;
/**
* @brief Sets a batch of tensors for input data to infer by input port.
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
* The current version supports setting tensors to model inputs only. If @p port is associated
* with output (or any other non-input node), an exception is thrown.
*
* @param port Port of the input tensor.
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
*/
virtual void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) = 0;
/**
* @brief Gets state control interface for the given infer request.
*
* State control essential for recurrent models.
* @return Vector of Variable State objects.
*/
virtual std::vector<ov::VariableState> query_state() const = 0;
/**
* @brief Gets pointer to compiled model (usually synchronous request holds the compiled model)
*
* @return Pointer to the compiled model
*/
virtual const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const = 0;
/**
* @brief Gets inputs for infer request
*
* @return vector of input ports
*/
virtual const std::vector<ov::Output<const ov::Node>>& get_inputs() const = 0;
/**
* @brief Gets outputs for infer request
*
* @return vector of output ports
*/
virtual const std::vector<ov::Output<const ov::Node>>& get_outputs() const = 0;
protected:
/**
* @brief Check that all tensors are valid. Throws an exception if it's not.
*/
virtual void check_tensors() const = 0;
friend IAsyncInferRequest;
};
}; // namespace ov

View File

@ -0,0 +1,153 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief OpenVINO Runtime InferRequest interface
* @file openvino/runtime/isync_infer_request.hpp
*/
#pragma once
#include <exception>
#include <memory>
#include <openvino/runtime/tensor.hpp>
#include <unordered_map>
#include <vector>
#include "openvino/runtime/common.hpp"
#include "openvino/runtime/iinfer_request.hpp"
#include "openvino/runtime/profiling_info.hpp"
namespace ov {
/**
* @brief Interface for syncronous infer request
*/
class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest {
public:
/**
* @brief Constructs syncronous inference request
*
* @param compiled_model pointer to compiled model
*/
ISyncInferRequest(const std::shared_ptr<ov::ICompiledModel>& compiled_model);
/**
* @brief Gets an input/output tensor for inference.
* @note If the tensor with the specified @p port is not found, an exception is thrown.
* @param port Port of the tensor to get.
* @return Tensor for the port @p port.
*/
ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const override;
/**
* @brief Sets an input/output tensor to infer.
* @param port Port of the input or output tensor.
* @param tensor Reference to a tensor. The element_type and shape of a tensor must match
* the model's input/output element_type and size.
*/
void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override;
/**
* @brief Gets a batch of tensors for input data to infer by input port.
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
* The current version supports setting tensors to model inputs only. If @p port is associated
* with output (or any other non-input node), an exception is thrown.
*
* @param port Port of the input tensor.
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
* @return vector of tensors
*/
std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const override;
/**
* @brief Sets a batch of tensors for input data to infer by input port.
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
* The current version supports setting tensors to model inputs only. If @p port is associated
* with output (or any other non-input node), an exception is thrown.
*
* @param port Port of the input tensor.
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
*/
void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) override;
/**
* @brief Plugin implementation for set tensors
*
* @param port Port of the input tensor.
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
*/
virtual void set_tensors_impl(const ov::Output<const ov::Node> port, const std::vector<ov::Tensor>& tensors);
/**
* @brief Gets inputs for infer request
*
* @return vector of input ports
*/
const std::vector<ov::Output<const ov::Node>>& get_inputs() const override;
/**
* @brief Gets outputs for infer request
*
* @return vector of output ports
*/
const std::vector<ov::Output<const ov::Node>>& get_outputs() const override;
/**
* @brief Gets pointer to compiled model (usually synchronous request holds the compiled model)
*
* @return Pointer to the compiled model
*/
const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const override;
protected:
struct FoundPort {
size_t idx;
enum class Type { NOT_FOUND = 0, INPUT, OUTPUT } type;
bool found() {
return type != Type::NOT_FOUND;
}
bool is_input() {
return type == Type::INPUT;
}
bool is_output() {
return !is_input();
}
};
/**
* @brief Finds input or output port
* @return structure which contains index of Input/Output or report that port wasn't found
*/
FoundPort find_port(const ov::Output<const ov::Node>& port) const;
/**
* @brief Converts batched tensors to tensor
*/
void convert_batched_tensors();
/**
* @brief Basic checks for input/output tensor
*
* @param port Input/Output port
* @param tensor Input/Output tensor
*/
void check_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) const;
/**
* @brief Check that all tensors are valid. Throws an exception if it's not.
*/
void check_tensors() const override;
std::vector<ov::Tensor> m_input_tensors;
std::vector<ov::Tensor> m_output_tensors;
std::unordered_map<size_t, std::vector<ov::Tensor>> m_batched_tensors;
private:
std::shared_ptr<ov::ICompiledModel> m_compiled_model;
};
}; // namespace ov

View File

@ -19,20 +19,17 @@
#include "openvino/runtime/tensor.hpp"
#include "openvino/runtime/variable_state.hpp"
namespace InferenceEngine {
class IInferRequestInternal;
} // namespace InferenceEngine
namespace ov {
class CompiledModel;
class IAsyncInferRequest;
/**
* @brief This is a class of infer request that can be run in asynchronous or synchronous manners.
* @ingroup ov_runtime_cpp_api
*/
class OPENVINO_RUNTIME_API InferRequest {
std::shared_ptr<InferenceEngine::IInferRequestInternal> _impl;
std::shared_ptr<ov::IAsyncInferRequest> _impl;
std::shared_ptr<void> _so;
/**
@ -41,7 +38,7 @@ class OPENVINO_RUNTIME_API InferRequest {
* @param so Plugin to use. This is required to ensure that InferRequest can work properly even if a plugin object
* is destroyed.
*/
InferRequest(const std::shared_ptr<InferenceEngine::IInferRequestInternal>& impl, const std::shared_ptr<void>& so);
InferRequest(const std::shared_ptr<ov::IAsyncInferRequest>& impl, const std::shared_ptr<void>& so);
friend class ov::CompiledModel;
public:

View File

@ -31,6 +31,7 @@ class Core;
class CoreImpl;
class Plugin;
class IPlugin;
class ISyncInferRequest;
class IInferencePluginWrapper;
class IExecutableNetworkWrapper;
class CompiledModel;
@ -62,6 +63,7 @@ protected:
friend class ov::CoreImpl;
friend class ov::Plugin;
friend class ov::IPlugin;
friend class ov::ISyncInferRequest;
friend class ov::IInferencePluginWrapper;
friend class ov::IExecutableNetworkWrapper;
friend class ov::CompiledModel;

View File

@ -17,11 +17,13 @@
namespace InferenceEngine {
class IVariableStateInternal;
class IAsyncInferRequestWrapper;
} // namespace InferenceEngine
namespace ov {
class InferRequest;
class IInferRequestInternalWrapper;
/**
* @brief VariableState class
@ -41,6 +43,8 @@ class OPENVINO_RUNTIME_API VariableState {
const std::vector<std::shared_ptr<void>>& so);
friend class ov::InferRequest;
friend class ov::IInferRequestInternalWrapper;
friend class InferenceEngine::IAsyncInferRequestWrapper;
public:
/**

View File

@ -19,24 +19,6 @@
#include "openvino/runtime/infer_request.hpp"
#include "transformations/utils/utils.hpp"
namespace {
inline bool getPort(ov::Output<const ov::Node>& port,
const std::string& name,
const std::vector<std::vector<std::shared_ptr<const ov::Node>>>& ports) {
for (const auto& nodes : ports) {
for (const auto& node : nodes) {
const auto& names = node->get_output_tensor(0).get_names();
if (names.find(name) != names.end()) {
port = node->output(0);
return true;
}
}
}
return false;
}
} // namespace
namespace InferenceEngine {
#define INFER_REQ_CALL_STATEMENT(...) \
@ -48,18 +30,6 @@ namespace InferenceEngine {
::InferenceEngine::details::Rethrow(); \
}
#define OV_INFER_REQ_CALL_STATEMENT(...) \
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized."); \
try { \
__VA_ARGS__; \
} catch (const ::InferenceEngine::RequestBusy& ex) { \
throw ov::Busy(ex.what()); \
} catch (const std::exception& ex) { \
throw ov::Exception(ex.what()); \
} catch (...) { \
OPENVINO_ASSERT(false, "Unexpected exception"); \
}
InferRequest::~InferRequest() {
_impl = {};
}
@ -237,301 +207,3 @@ bool InferRequest::operator==(const InferRequest& r) const noexcept {
}
} // namespace InferenceEngine
namespace {
std::string get_legacy_name_from_port(const ov::Output<const ov::Node>& port) {
ov::Output<ngraph::Node> p(std::const_pointer_cast<ov::Node>(port.get_node_shared_ptr()), port.get_index());
if (auto node = std::dynamic_pointer_cast<ov::op::v0::Result>(p.get_node_shared_ptr())) {
p = node->input_value(0);
}
return ov::op::util::create_ie_output_name(p);
}
} // namespace
namespace ov {
InferRequest::~InferRequest() {
_impl = {};
}
InferRequest::InferRequest(const ie::IInferRequestInternal::Ptr& impl, const std::shared_ptr<void>& so)
: _impl{impl},
_so{so} {
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
}
void InferRequest::set_tensor(const ov::Output<const ov::Node>& port, const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({ _impl->SetBlob(get_legacy_name_from_port(port), tensor._impl); });
}
void InferRequest::set_tensor(const ov::Output<ov::Node>& port, const Tensor& tensor) {
set_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()), tensor);
}
void InferRequest::set_tensor(const std::string& name, const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
ov::Output<const ov::Node> port;
OPENVINO_ASSERT(::getPort(port, name, {_impl->GetInputs(), _impl->GetOutputs()}),
"Port for tensor name " + name + " was not found.");
set_tensor(port, tensor);
});
}
void InferRequest::set_tensors(const std::string& name, const std::vector<Tensor>& tensors) {
OV_INFER_REQ_CALL_STATEMENT({
ov::Output<const ov::Node> port;
OPENVINO_ASSERT(::getPort(port, name, {_impl->GetInputs()}),
"set_tensors error. Input port for tensor name ",
name,
" was not found.");
set_tensors(port, tensors);
})
}
void InferRequest::set_tensors(const ov::Output<const ov::Node>& port, const std::vector<Tensor>& tensors) {
auto impls = std::vector<InferenceEngine::Blob::Ptr>();
std::transform(tensors.begin(), tensors.end(), std::back_inserter(impls), [](const Tensor& item) {
return item._impl;
});
OV_INFER_REQ_CALL_STATEMENT({ _impl->SetBlobs(get_legacy_name_from_port(port), impls); })
}
void InferRequest::set_input_tensor(size_t idx, const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
const auto& inputs = _impl->GetInputs();
OPENVINO_ASSERT(inputs.size() > idx,
"Input port for index ",
idx,
" was not found! The model has only ",
inputs.size(),
" inputs.");
set_tensor(inputs.at(idx)->output(0), tensor);
});
}
void InferRequest::set_input_tensor(const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
const auto inputs = _impl->GetInputs();
OPENVINO_ASSERT(inputs.size() == 1,
"set_input_tensor() must be called on a function with exactly one parameter.");
set_tensor(inputs.at(0)->output(0), tensor);
});
}
void InferRequest::set_input_tensors(size_t idx, const std::vector<Tensor>& tensors) {
OV_INFER_REQ_CALL_STATEMENT({
OPENVINO_ASSERT(idx < _impl->GetInputs().size(),
"set_input_tensors error. Input port for index ",
idx,
" is out of bounds. Model has only ",
_impl->GetInputs().size(),
" inputs");
set_tensors(_impl->GetInputs().at(idx)->output(0), tensors);
})
}
void InferRequest::set_input_tensors(const std::vector<Tensor>& tensors) {
OV_INFER_REQ_CALL_STATEMENT({
OPENVINO_ASSERT(_impl->GetInputs().size() == 1,
"set_input_tensors(tensors) must be used for single-input models only. Model has ",
_impl->GetInputs().size(),
" inputs");
set_tensors(_impl->GetInputs().at(0)->output(0), tensors);
})
}
void InferRequest::set_output_tensor(size_t idx, const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
const auto& outputs = _impl->GetOutputs();
OPENVINO_ASSERT(outputs.size() > idx,
"Output port for index ",
idx,
" was not found! The model has only ",
outputs.size(),
" outputs.");
set_tensor(outputs.at(idx)->output(0), tensor);
});
}
void InferRequest::set_output_tensor(const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
const auto outputs = _impl->GetOutputs();
OPENVINO_ASSERT(outputs.size() == 1,
"set_output_tensor() must be called on a function with exactly one parameter.");
set_tensor(outputs.at(0)->output(0), tensor);
});
}
Tensor InferRequest::get_tensor(const ov::Output<const ov::Node>& port) {
std::vector<std::shared_ptr<void>> soVec;
OV_INFER_REQ_CALL_STATEMENT({
const auto& name = get_legacy_name_from_port(port);
OPENVINO_ASSERT(!_impl->GetBlobs(name),
"get_tensor shall not be used together with batched "
"set_tensors/set_input_tensors for name '",
name,
"'");
auto blob = _impl->GetBlob(name);
soVec = {_so, _impl->getPointerToSo()};
Tensor tensor = {blob, soVec};
return tensor;
});
}
Tensor InferRequest::get_tensor(const ov::Output<ov::Node>& port) {
return get_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()));
}
Tensor InferRequest::get_tensor(const std::string& name) {
OV_INFER_REQ_CALL_STATEMENT({
ov::Output<const ov::Node> port;
OPENVINO_ASSERT(::getPort(port, name, {_impl->GetInputs(), _impl->GetOutputs()}),
"Port for tensor name " + name + " was not found.");
return get_tensor(port);
});
}
Tensor InferRequest::get_input_tensor(size_t idx) {
OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->GetInputs().at(idx)->output(0)); });
}
Tensor InferRequest::get_output_tensor(size_t idx) {
OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->GetOutputs().at(idx)->output(0)); });
}
Tensor InferRequest::get_input_tensor() {
OV_INFER_REQ_CALL_STATEMENT({
const auto inputs = _impl->GetInputs();
if (inputs.size() != 1) {
throw ov::Exception("get_input_tensor() must be called on a function with exactly one parameter.");
}
return get_tensor(inputs.at(0)->output(0));
});
}
Tensor InferRequest::get_output_tensor() {
OV_INFER_REQ_CALL_STATEMENT({
const auto outputs = _impl->GetOutputs();
if (outputs.size() != 1) {
throw ov::Exception("get_output_tensor() must be called on a function with exactly one parameter.");
}
return get_tensor(outputs.at(0)->output(0));
});
}
void InferRequest::infer() {
OV_INFER_REQ_CALL_STATEMENT(_impl->Infer();)
}
void InferRequest::cancel() {
OV_INFER_REQ_CALL_STATEMENT(_impl->Cancel();)
}
std::vector<ProfilingInfo> InferRequest::get_profiling_info() const {
OV_INFER_REQ_CALL_STATEMENT({
auto ieInfos = _impl->GetPerformanceCounts();
std::vector<ProfilingInfo> infos;
infos.reserve(ieInfos.size());
while (!ieInfos.empty()) {
auto itIeInfo = std::min_element(
std::begin(ieInfos),
std::end(ieInfos),
[](const decltype(ieInfos)::value_type& lhs, const decltype(ieInfos)::value_type& rhs) {
return lhs.second.execution_index < rhs.second.execution_index;
});
IE_ASSERT(itIeInfo != ieInfos.end());
auto& ieInfo = itIeInfo->second;
infos.push_back(ProfilingInfo{});
auto& info = infos.back();
switch (ieInfo.status) {
case ie::InferenceEngineProfileInfo::NOT_RUN:
info.status = ProfilingInfo::Status::NOT_RUN;
break;
case ie::InferenceEngineProfileInfo::OPTIMIZED_OUT:
info.status = ProfilingInfo::Status::OPTIMIZED_OUT;
break;
case ie::InferenceEngineProfileInfo::EXECUTED:
info.status = ProfilingInfo::Status::EXECUTED;
break;
}
info.real_time = std::chrono::microseconds{ieInfo.realTime_uSec};
info.cpu_time = std::chrono::microseconds{ieInfo.cpu_uSec};
info.node_name = itIeInfo->first;
info.exec_type = std::string{ieInfo.exec_type};
info.node_type = std::string{ieInfo.layer_type};
ieInfos.erase(itIeInfo);
}
return infos;
})
}
void InferRequest::start_async() {
OV_INFER_REQ_CALL_STATEMENT(_impl->StartAsync();)
}
void InferRequest::wait() {
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
try {
_impl->Wait(ie::InferRequest::RESULT_READY);
} catch (const ie::InferCancelled& e) {
throw Cancelled{e.what()};
} catch (const std::exception& ex) {
throw Exception(ex.what());
} catch (...) {
OPENVINO_UNREACHABLE("Unexpected exception");
}
}
bool InferRequest::wait_for(const std::chrono::milliseconds timeout) {
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
try {
return _impl->Wait(timeout.count()) == ie::OK;
} catch (const ie::InferCancelled& e) {
throw Cancelled{e.what()};
} catch (const std::exception& ex) {
throw Exception(ex.what());
} catch (...) {
OPENVINO_UNREACHABLE("Unexpected exception");
}
}
void InferRequest::set_callback(std::function<void(std::exception_ptr)> callback) {
OV_INFER_REQ_CALL_STATEMENT(_impl->SetCallback(std::move(callback));)
}
std::vector<VariableState> InferRequest::query_state() {
std::vector<VariableState> variable_states;
std::vector<std::shared_ptr<void>> soVec;
OV_INFER_REQ_CALL_STATEMENT({
soVec = {_so, _impl->getPointerToSo()};
for (auto&& state : _impl->QueryState()) {
variable_states.emplace_back(VariableState{state, soVec});
}
})
return variable_states;
}
CompiledModel InferRequest::get_compiled_model() {
OV_INFER_REQ_CALL_STATEMENT(
return {ov::legacy_convert::convert_compiled_model(_impl->getPointerToExecutableNetworkInternal()), _so});
}
bool InferRequest::operator!() const noexcept {
return !_impl;
}
InferRequest::operator bool() const noexcept {
return (!!_impl);
}
bool InferRequest::operator!=(const InferRequest& r) const noexcept {
return !(r == *this);
}
bool InferRequest::operator==(const InferRequest& r) const noexcept {
return r._impl == _impl;
}
} // namespace ov

View File

@ -4,36 +4,48 @@
#include "converter_utils.hpp"
#include <ie_blob.h>
#include <ie_common.h>
#include <ie_compound_blob.h>
#include <ie_layouts.h>
#include <fstream>
#include <ie_input_info.hpp>
#include <ie_plugin_config.hpp>
#include <ie_version.hpp>
#include <memory>
#include <openvino/core/except.hpp>
#include <openvino/op/parameter.hpp>
#include <openvino/runtime/exception.hpp>
#include <openvino/runtime/remote_context.hpp>
#include <openvino/runtime/tensor.hpp>
#include <mutex>
#include "any_copy.hpp"
#include "cnn_network_ngraph_impl.hpp"
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
#include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
#include "icompiled_model_wrapper.hpp"
#include "ie_blob.h"
#include "ie_common.h"
#include "ie_compound_blob.h"
#include "ie_icore.hpp"
#include "ie_input_info.hpp"
#include "ie_layouts.h"
#include "ie_ngraph_utils.hpp"
#include "ie_plugin_config.hpp"
#include "ie_version.hpp"
#include "iplugin_wrapper.hpp"
#include "openvino/core/except.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/runtime/exception.hpp"
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/iinfer_request.hpp"
#include "openvino/runtime/iplugin.hpp"
#include "openvino/runtime/profiling_info.hpp"
#include "openvino/runtime/remote_context.hpp"
#include "openvino/runtime/tensor.hpp"
#include "openvino/runtime/variable_state.hpp"
#include "so_ptr.hpp"
#include "transformations/utils/utils.hpp"
namespace {
std::string get_legacy_name_from_port(const ov::Output<const ov::Node>& port) {
ov::Output<ngraph::Node> p(std::const_pointer_cast<ov::Node>(port.get_node_shared_ptr()), port.get_index());
if (auto node = std::dynamic_pointer_cast<ov::op::v0::Result>(p.get_node_shared_ptr())) {
p = node->input_value(0);
}
return ov::op::util::create_ie_output_name(p);
}
void fill_input_info(ov::Output<ov::Node>& input, InferenceEngine::InputInfo::Ptr& input_info) {
const ov::Output<const ov::Node> const_input(input.get_node(), input.get_index());
ov::legacy_convert::fill_input_info(const_input, input_info);
@ -341,7 +353,9 @@ public:
}
std::shared_ptr<InferenceEngine::IInferRequestInternal> CreateInferRequest() override {
return m_model->create_infer_request();
auto infer_request = legacy_convert::convert_infer_request(m_model->create_infer_request());
infer_request->setPointerToExecutableNetworkInternal(shared_from_this());
return infer_request;
}
void Export(std::ostream& model) override {
@ -397,3 +411,312 @@ std::shared_ptr<ov::ICompiledModel> ov::legacy_convert::convert_compiled_model(
}
return std::make_shared<InferenceEngine::ICompiledModelWrapper>(model);
}
namespace ov {
class IInferRequestInternalWrapper : public InferenceEngine::IInferRequestInternal {
ov::Output<const ov::Node> find_port(const std::string& legacy_name) const {
for (const auto& port : m_request->get_inputs()) {
if (get_legacy_name_from_port(port) == legacy_name)
return port;
}
for (const auto& port : m_request->get_outputs()) {
if (get_legacy_name_from_port(port) == legacy_name)
return port;
}
OPENVINO_ASSERT(false, "Cannot find port with name: ", legacy_name);
}
public:
explicit IInferRequestInternalWrapper(const std::shared_ptr<ov::IAsyncInferRequest>& request)
: m_request(request) {}
void Infer() override {
m_request->infer();
}
void Cancel() override {
m_request->cancel();
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override {
auto res = m_request->get_profiling_info();
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> ret;
for (const auto& info : res) {
InferenceEngine::InferenceEngineProfileInfo old_info;
old_info.cpu_uSec = info.cpu_time.count();
old_info.realTime_uSec = info.real_time.count();
strncpy(old_info.exec_type, info.exec_type.c_str(), sizeof(old_info.exec_type));
old_info.exec_type[sizeof(old_info.exec_type) - 1] = 0;
strncpy(old_info.layer_type, info.node_type.c_str(), sizeof(old_info.layer_type));
old_info.layer_type[sizeof(old_info.layer_type) - 1] = 0;
switch (info.status) {
case ov::ProfilingInfo::Status::EXECUTED:
old_info.status = InferenceEngine::InferenceEngineProfileInfo::EXECUTED;
break;
case ov::ProfilingInfo::Status::NOT_RUN:
old_info.status = InferenceEngine::InferenceEngineProfileInfo::NOT_RUN;
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
old_info.status = InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT;
break;
}
ret[info.node_name] = old_info;
}
return ret;
}
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override {
m_request->set_tensor(find_port(name), ov::Tensor{data, {}});
}
void SetBlobs(const std::string& name, const std::vector<InferenceEngine::Blob::Ptr>& blobs) override {
std::vector<ov::Tensor> tensors;
for (const auto& blob : blobs) {
tensors.emplace_back(ov::Tensor{blob, {}});
}
m_request->set_tensors(find_port(name), tensors);
}
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override {
return m_request->get_tensor(find_port(name))._impl;
}
InferenceEngine::BatchedBlob::Ptr GetBlobs(const std::string& name) override {
auto tensors = m_request->get_tensors(find_port(name));
std::vector<InferenceEngine::Blob::Ptr> blobs;
for (const auto& tensor : tensors) {
blobs.emplace_back(tensor._impl);
}
return std::make_shared<InferenceEngine::BatchedBlob>(blobs);
}
void SetBlob(const std::string& name,
const InferenceEngine::Blob::Ptr& data,
const InferenceEngine::PreProcessInfo& info) override {
OPENVINO_NOT_IMPLEMENTED;
}
const InferenceEngine::PreProcessInfo& GetPreProcess(const std::string& name) const override {
OPENVINO_NOT_IMPLEMENTED;
}
void SetBatch(int batch) override {
OPENVINO_NOT_IMPLEMENTED;
}
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState() override {
auto res = m_request->query_state();
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> ret;
for (const auto& state : res) {
ret.emplace_back(state._impl);
}
return ret;
}
void StartAsync() override {
m_request->start_async();
}
InferenceEngine::StatusCode Wait(int64_t millis_timeout) override {
if (millis_timeout == InferenceEngine::IInferRequest::RESULT_READY) {
m_request->wait();
} else {
std::chrono::milliseconds timeout(millis_timeout);
bool res = m_request->wait_for(timeout);
if (!res)
return InferenceEngine::StatusCode::RESULT_NOT_READY;
}
return InferenceEngine::StatusCode::OK;
}
void SetCallback(std::function<void(std::exception_ptr)> callback) override {
m_request->set_callback(std::move(callback));
}
std::shared_ptr<ov::IAsyncInferRequest> get_infer_request() {
return m_request;
}
private:
std::shared_ptr<ov::IAsyncInferRequest> m_request;
};
} // namespace ov
namespace InferenceEngine {
class IAsyncInferRequestWrapper : public ov::IAsyncInferRequest {
public:
IAsyncInferRequestWrapper(const std::shared_ptr<InferenceEngine::IInferRequestInternal>& request)
: ov::IAsyncInferRequest(nullptr, nullptr, nullptr),
m_request(request) {
if (m_request->getPointerToExecutableNetworkInternal())
m_compiled_model =
ov::legacy_convert::convert_compiled_model(m_request->getPointerToExecutableNetworkInternal());
}
std::shared_ptr<InferenceEngine::IInferRequestInternal> get_infer_request() {
return m_request;
}
void infer() override {
m_request->Infer();
}
void start_async() override {
m_request->StartAsync();
}
void wait() override {
try {
m_request->Wait(InferenceEngine::InferRequest::RESULT_READY);
} catch (const ov::Cancelled&) {
throw;
} catch (const InferenceEngine::InferCancelled& e) {
throw ov::Cancelled{e.what()};
} catch (const std::exception& ex) {
throw ov::Exception(ex.what());
} catch (...) {
OPENVINO_UNREACHABLE("Unexpected exception");
}
}
bool wait_for(const std::chrono::milliseconds& timeout) override {
try {
return m_request->Wait(timeout.count()) == InferenceEngine::OK;
} catch (const InferenceEngine::InferCancelled& e) {
throw ov::Cancelled{e.what()};
} catch (const std::exception& ex) {
throw Exception(ex.what());
} catch (...) {
OPENVINO_UNREACHABLE("Unexpected exception");
}
}
void cancel() override {
m_request->Cancel();
}
std::vector<ov::ProfilingInfo> get_profiling_info() const override {
auto ieInfos = m_request->GetPerformanceCounts();
std::vector<ov::ProfilingInfo> infos;
infos.reserve(ieInfos.size());
while (!ieInfos.empty()) {
auto itIeInfo = std::min_element(
std::begin(ieInfos),
std::end(ieInfos),
[](const decltype(ieInfos)::value_type& lhs, const decltype(ieInfos)::value_type& rhs) {
return lhs.second.execution_index < rhs.second.execution_index;
});
IE_ASSERT(itIeInfo != ieInfos.end());
auto& ieInfo = itIeInfo->second;
infos.push_back(ov::ProfilingInfo{});
auto& info = infos.back();
switch (ieInfo.status) {
case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
info.status = ov::ProfilingInfo::Status::NOT_RUN;
break;
case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
info.status = ov::ProfilingInfo::Status::OPTIMIZED_OUT;
break;
case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
info.status = ov::ProfilingInfo::Status::EXECUTED;
break;
}
info.real_time = std::chrono::microseconds{ieInfo.realTime_uSec};
info.cpu_time = std::chrono::microseconds{ieInfo.cpu_uSec};
info.node_name = itIeInfo->first;
info.exec_type = std::string{ieInfo.exec_type};
info.node_type = std::string{ieInfo.layer_type};
ieInfos.erase(itIeInfo);
}
return infos;
}
ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const override {
const auto& name = get_legacy_name_from_port(port);
OPENVINO_ASSERT(!m_request->GetBlobs(name),
"get_tensor shall not be used together with batched "
"set_tensors/set_input_tensors for name '",
name,
"'");
auto blob = m_request->GetBlob(name);
ov::Tensor tensor = {blob, {m_request->getPointerToSo()}};
return tensor;
}
void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override {
m_request->SetBlob(get_legacy_name_from_port(port), tensor._impl);
}
std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const override {
auto blobs = m_request->GetBlobs(get_legacy_name_from_port(port));
std::vector<ov::Tensor> ret;
if (!blobs)
return ret;
for (size_t i = 0; i < blobs->size(); i++) {
ret.emplace_back(ov::Tensor{blobs->getBlob(i), {m_request->getPointerToSo()}});
}
return ret;
}
void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) override {
std::vector<InferenceEngine::Blob::Ptr> blobs;
for (const auto& tensor : tensors) {
blobs.emplace_back(tensor._impl);
}
m_request->SetBlobs(get_legacy_name_from_port(port), blobs);
}
std::vector<ov::VariableState> query_state() const override {
std::vector<ov::VariableState> variable_states;
std::vector<std::shared_ptr<void>> soVec;
soVec = {m_request->getPointerToSo()};
for (auto&& state : m_request->QueryState()) {
variable_states.emplace_back(ov::VariableState{state, soVec});
}
return variable_states;
}
void set_callback(std::function<void(std::exception_ptr)> callback) override {
m_request->SetCallback(std::move(callback));
}
const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const override {
if (!m_compiled_model) {
std::lock_guard<std::mutex> lock(m_mutex);
if (!m_compiled_model) {
if (m_request->getPointerToExecutableNetworkInternal())
m_compiled_model =
ov::legacy_convert::convert_compiled_model(m_request->getPointerToExecutableNetworkInternal());
}
}
OPENVINO_ASSERT(m_compiled_model);
return m_compiled_model;
}
const std::vector<ov::Output<const ov::Node>>& get_inputs() const override {
return get_compiled_model()->inputs();
}
const std::vector<ov::Output<const ov::Node>>& get_outputs() const override {
return get_compiled_model()->outputs();
}
private:
std::shared_ptr<InferenceEngine::IInferRequestInternal> m_request;
mutable std::shared_ptr<ov::ICompiledModel> m_compiled_model;
mutable std::mutex m_mutex;
};
} // namespace InferenceEngine
std::shared_ptr<::InferenceEngine::IInferRequestInternal> ov::legacy_convert::convert_infer_request(
const std::shared_ptr<::ov::IAsyncInferRequest>& request) {
if (auto comp_model = std::dynamic_pointer_cast<InferenceEngine::IAsyncInferRequestWrapper>(request)) {
return comp_model->get_infer_request();
}
return std::make_shared<ov::IInferRequestInternalWrapper>(request);
}
std::shared_ptr<::ov::IAsyncInferRequest> ov::legacy_convert::convert_infer_request(
const std::shared_ptr<::InferenceEngine::IInferRequestInternal>& request) {
if (auto comp_model = std::dynamic_pointer_cast<ov::IInferRequestInternalWrapper>(request)) {
return comp_model->get_infer_request();
}
return std::make_shared<InferenceEngine::IAsyncInferRequestWrapper>(request);
}

View File

@ -5,8 +5,10 @@
#pragma once
#include "cpp/ie_cnn_network.h"
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
#include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
#include "openvino/core/model.hpp"
#include "openvino/runtime/iasync_infer_request.hpp"
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/iplugin.hpp"
@ -22,11 +24,18 @@ std::shared_ptr<const ov::Model> convert_model(const InferenceEngine::CNNNetwork
std::shared_ptr<::InferenceEngine::IInferencePlugin> convert_plugin(const std::shared_ptr<::ov::IPlugin>& plugin);
std::shared_ptr<::ov::IPlugin> convert_plugin(const std::shared_ptr<::InferenceEngine::IInferencePlugin>& plugin);
std::shared_ptr<::InferenceEngine::IExecutableNetworkInternal> convert_compiled_model(
// TODO: remove export after changes in template plugin
OPENVINO_RUNTIME_API std::shared_ptr<::InferenceEngine::IExecutableNetworkInternal> convert_compiled_model(
const std::shared_ptr<::ov::ICompiledModel>& model);
std::shared_ptr<::ov::ICompiledModel> convert_compiled_model(
const std::shared_ptr<::InferenceEngine::IExecutableNetworkInternal>& model);
// TODO: remove export after changes in template plugin
OPENVINO_RUNTIME_API std::shared_ptr<::InferenceEngine::IInferRequestInternal> convert_infer_request(
const std::shared_ptr<::ov::IAsyncInferRequest>& request);
OPENVINO_RUNTIME_API std::shared_ptr<::ov::IAsyncInferRequest> convert_infer_request(
const std::shared_ptr<::InferenceEngine::IInferRequestInternal>& request);
} // namespace legacy_convert
} // namespace ov

View File

@ -0,0 +1,266 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/runtime/iasync_infer_request.hpp"
#include <memory>
#include "openvino/runtime/isync_infer_request.hpp"
#include "openvino/runtime/variable_state.hpp"
#include "threading/ie_immediate_executor.hpp"
#include "threading/ie_istreams_executor.hpp"
namespace {
struct ImmediateStreamsExecutor : public InferenceEngine::ITaskExecutor {
explicit ImmediateStreamsExecutor(const InferenceEngine::IStreamsExecutor::Ptr& streamsExecutor)
: _streamsExecutor{streamsExecutor} {}
void run(InferenceEngine::Task task) override {
_streamsExecutor->Execute(std::move(task));
}
InferenceEngine::IStreamsExecutor::Ptr _streamsExecutor;
};
} // namespace
ov::IAsyncInferRequest::~IAsyncInferRequest() {
stop_and_wait();
}
ov::IAsyncInferRequest::IAsyncInferRequest(const std::shared_ptr<IInferRequest>& request,
const InferenceEngine::ITaskExecutor::Ptr& task_executor,
const InferenceEngine::ITaskExecutor::Ptr& callback_executor)
: m_sync_request(request),
m_request_executor(task_executor),
m_callback_executor(callback_executor) {
if (m_request_executor && m_sync_request)
m_pipeline = {{m_request_executor, [this] {
m_sync_request->infer();
}}};
if (m_sync_request)
m_sync_pipeline = {{std::make_shared<InferenceEngine::ImmediateExecutor>(), [this] {
m_sync_request->infer();
}}};
auto streams_executor = std::dynamic_pointer_cast<InferenceEngine::IStreamsExecutor>(m_request_executor);
if (streams_executor != nullptr) {
m_sync_pipeline = {{std::make_shared<ImmediateStreamsExecutor>(std::move(streams_executor)), [this] {
m_sync_request->infer();
}}};
}
}
void ov::IAsyncInferRequest::wait() {
// Just use the last '_futures' member to wait pipeline completion
auto future = [&] {
std::lock_guard<std::mutex> lock{m_mutex};
return m_futures.empty() ? std::shared_future<void>{} : m_futures.back();
}();
if (!future.valid()) {
return;
}
future.wait();
}
bool ov::IAsyncInferRequest::wait_for(const std::chrono::milliseconds& timeout) {
OPENVINO_ASSERT(timeout >= std::chrono::milliseconds{0}, "Timeout can't be less than 0 for InferRequest::wait().");
auto status = std::future_status::deferred;
// Just use the last '_futures' member to wait pipeline completion
auto future = [&] {
std::lock_guard<std::mutex> lock{m_mutex};
return m_futures.empty() ? std::shared_future<void>{} : m_futures.back();
}();
if (!future.valid()) {
return false;
}
status = future.wait_for(std::chrono::milliseconds{timeout});
if (std::future_status::ready == status) {
future.get();
return true;
} else {
return false;
}
}
void ov::IAsyncInferRequest::cancel() {
std::lock_guard<std::mutex> lock{m_mutex};
if (m_state == InferState::BUSY) {
m_state = InferState::CANCELLED;
}
}
void ov::IAsyncInferRequest::set_callback(std::function<void(std::exception_ptr)> callback) {
check_state();
m_callback = std::move(callback);
}
std::vector<ov::VariableState> ov::IAsyncInferRequest::query_state() const {
check_state();
return m_sync_request->query_state();
}
void ov::IAsyncInferRequest::infer_thread_unsafe() {
run_first_stage(m_sync_pipeline.begin(), m_sync_pipeline.end(), m_sync_callback_executor);
}
void ov::IAsyncInferRequest::start_async_thread_unsafe() {
run_first_stage(m_pipeline.begin(), m_pipeline.end(), m_callback_executor);
}
void ov::IAsyncInferRequest::run_first_stage(const Pipeline::iterator itBeginStage,
const Pipeline::iterator itEndStage,
const InferenceEngine::ITaskExecutor::Ptr callbackExecutor) {
auto& firstStageExecutor = std::get<Stage_e::EXECUTOR>(*itBeginStage);
OPENVINO_ASSERT(nullptr != firstStageExecutor);
firstStageExecutor->run(make_next_stage_task(itBeginStage, itEndStage, std::move(callbackExecutor)));
}
InferenceEngine::Task ov::IAsyncInferRequest::make_next_stage_task(
const Pipeline::iterator itStage,
const Pipeline::iterator itEndStage,
const InferenceEngine::ITaskExecutor::Ptr callbackExecutor) {
return std::bind(
[this, itStage, itEndStage](InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) mutable {
std::exception_ptr currentException = nullptr;
auto& thisStage = *itStage;
auto itNextStage = itStage + 1;
try {
auto& stageTask = std::get<Stage_e::TASK>(thisStage);
OPENVINO_ASSERT(nullptr != stageTask);
stageTask();
if (itEndStage != itNextStage) {
auto& nextStage = *itNextStage;
auto& nextStageExecutor = std::get<Stage_e::EXECUTOR>(nextStage);
OPENVINO_ASSERT(nullptr != nextStageExecutor);
nextStageExecutor->run(make_next_stage_task(itNextStage, itEndStage, std::move(callbackExecutor)));
}
} catch (...) {
currentException = std::current_exception();
}
if ((itEndStage == itNextStage) || (nullptr != currentException)) {
auto lastStageTask = [this, currentException]() mutable {
auto promise = std::move(m_promise);
std::function<void(std::exception_ptr)> callback;
{
std::lock_guard<std::mutex> lock{m_mutex};
m_state = InferState::IDLE;
std::swap(callback, m_callback);
}
if (callback) {
try {
callback(currentException);
} catch (...) {
currentException = std::current_exception();
}
std::lock_guard<std::mutex> lock{m_mutex};
if (!m_callback) {
std::swap(callback, m_callback);
}
}
if (nullptr == currentException) {
promise.set_value();
} else {
promise.set_exception(currentException);
}
};
if (nullptr == callbackExecutor) {
lastStageTask();
} else {
callbackExecutor->run(std::move(lastStageTask));
}
}
},
std::move(callbackExecutor));
}
void ov::IAsyncInferRequest::start_async() {
infer_impl([&] {
start_async_thread_unsafe();
});
}
void ov::IAsyncInferRequest::check_state() const {
std::lock_guard<std::mutex> lock{m_mutex};
switch (m_state) {
case InferState::BUSY:
throw ov::Busy("Infer Request is busy");
case InferState::CANCELLED:
throw ov::Cancelled("Infer Request was canceled");
default:
break;
}
}
std::vector<ov::ProfilingInfo> ov::IAsyncInferRequest::get_profiling_info() const {
check_state();
return m_sync_request->get_profiling_info();
}
ov::Tensor ov::IAsyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
check_state();
return m_sync_request->get_tensor(port);
}
void ov::IAsyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) {
check_state();
return m_sync_request->set_tensor(port, tensor);
}
std::vector<ov::Tensor> ov::IAsyncInferRequest::get_tensors(const ov::Output<const ov::Node>& port) const {
check_state();
return m_sync_request->get_tensors(port);
}
void ov::IAsyncInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
const std::vector<ov::Tensor>& tensors) {
check_state();
return m_sync_request->set_tensors(port, tensors);
}
void ov::IAsyncInferRequest::stop_and_wait() {
Futures futures;
InferState state = InferState::IDLE;
{
std::lock_guard<std::mutex> lock{m_mutex};
state = m_state;
if (state != InferState::STOP) {
m_callback = {};
m_state = InferState::STOP;
futures = std::move(m_futures);
}
}
if (state != InferState::STOP) {
for (auto&& future : futures) {
if (future.valid()) {
future.wait();
}
}
}
}
void ov::IAsyncInferRequest::infer() {
m_sync_request->infer();
}
void ov::IAsyncInferRequest::check_tensors() const {
m_sync_request->check_tensors();
}
const std::shared_ptr<ov::ICompiledModel>& ov::IAsyncInferRequest::get_compiled_model() const {
return m_sync_request->get_compiled_model();
}
const std::vector<ov::Output<const ov::Node>>& ov::IAsyncInferRequest::get_inputs() const {
return m_sync_request->get_inputs();
}
const std::vector<ov::Output<const ov::Node>>& ov::IAsyncInferRequest::get_outputs() const {
return m_sync_request->get_outputs();
}

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2022 Intel Corporation
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@ -78,7 +78,8 @@ const std::vector<ov::Output<const ov::Node>>& ov::ICompiledModel::outputs() con
const std::vector<ov::Output<const ov::Node>>& ov::ICompiledModel::inputs() const {
return m_inputs;
}
std::shared_ptr<InferenceEngine::IInferRequestInternal> ov::ICompiledModel::create_infer_request() const {
std::shared_ptr<ov::IAsyncInferRequest> ov::ICompiledModel::create_infer_request() const {
return create_async_infer_request();
}

View File

@ -6,6 +6,8 @@
#include <ie_plugin_config.hpp>
#include "dev/converter_utils.hpp"
InferenceEngine::ICompiledModelWrapper::ICompiledModelWrapper(
const std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>& model)
: ov::ICompiledModel(nullptr, ov::legacy_convert::convert_plugin(model->_plugin)),
@ -20,9 +22,9 @@ InferenceEngine::ICompiledModelWrapper::ICompiledModelWrapper(
m_inputs = inputs;
m_outputs = outputs;
}
std::shared_ptr<InferenceEngine::IInferRequestInternal> InferenceEngine::ICompiledModelWrapper::create_infer_request()
const {
return m_model->CreateInferRequest();
std::shared_ptr<ov::IAsyncInferRequest> InferenceEngine::ICompiledModelWrapper::create_infer_request() const {
return ov::legacy_convert::convert_infer_request(m_model->CreateInferRequest());
}
void InferenceEngine::ICompiledModelWrapper::export_model(std::ostream& model) const {

View File

@ -13,7 +13,7 @@ namespace InferenceEngine {
class ICompiledModelWrapper : public ov::ICompiledModel {
public:
ICompiledModelWrapper(const std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>& model);
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_infer_request() const override;
std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;
void export_model(std::ostream& model) const override;
@ -30,7 +30,7 @@ public:
private:
std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> m_model;
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_sync_infer_request() const override {
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override {
OPENVINO_NOT_IMPLEMENTED;
}
};

View File

@ -0,0 +1,238 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/runtime/isync_infer_request.hpp"
#include "cpp_interfaces/plugin_itt.hpp"
#include "openvino/core/except.hpp"
#include "openvino/core/layout.hpp"
#include "openvino/core/parallel.hpp"
#include "openvino/op/util/op_types.hpp"
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/iinfer_request.hpp"
#include "openvino/runtime/remote_context.hpp"
#include "openvino/runtime/tensor.hpp"
namespace {
void check_batched_tensors(const ov::Output<const ov::Node>& input, const std::vector<ov::Tensor>& tensors) {
OPENVINO_ASSERT(!tensors.empty(), "set_input_tensors/set_tensors can't be called with empty tensors");
OPENVINO_ASSERT(
tensors.size() != 1,
"Internal error (plugin): check_batched_tensors is not allowed to have only one tensor inside batch");
auto layout = ov::layout::get_layout(input);
OPENVINO_ASSERT(ov::layout::has_batch(layout),
"set_input_tensors/set_tensors can be used only for inputs with N(batch) dimension"
" 'layout' defined. Current layout is ",
layout.to_string());
auto batch_idx = ov::layout::batch_idx(layout);
if (batch_idx < 0) {
// TODO: Do we need this logic?
batch_idx += static_cast<int64_t>(tensors[0].get_shape().size());
}
OPENVINO_ASSERT(batch_idx == 0,
"set_input_tensors/set_tensors is not currently supported for batch dimension index ",
batch_idx,
" != 0");
std::for_each(tensors.begin(), tensors.end(), [&batch_idx](const ov::Tensor& item) {
OPENVINO_ASSERT(item.get_shape()[batch_idx] == 1,
"set_input_tensors/set_tensors. Tensors shall represent one item in a batch, ",
item.get_shape()[batch_idx],
" provided");
});
auto tensors_size = static_cast<int>(tensors.size());
if (input.get_partial_shape().rank().is_static()) {
OPENVINO_ASSERT(batch_idx >= 0 && batch_idx < input.get_partial_shape().rank().get_length(),
"set_input_tensors/set_tensors error. Layout ",
layout.to_string(),
" is incorrect for operation with shape ",
input.get_partial_shape());
auto batch = input.get_partial_shape()[batch_idx];
OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size,
"set_input_tensors/set_tensors error. Input shape ",
input.get_partial_shape(),
"batch ",
batch,
"doesn't match with total blobs count: ",
tensors_size);
}
// In future consider checking if blobs point to contiguous range of memory and use single 'SetBlob' instead
auto batched_shape = tensors[0].get_shape();
auto element_type = tensors[0].get_element_type();
batched_shape[batch_idx] = tensors_size;
for (const auto& item : tensors) {
auto item_shape = item.get_shape();
item_shape[batch_idx] = batched_shape[batch_idx];
OPENVINO_ASSERT(item_shape == batched_shape && item.get_element_type() == element_type &&
"set_input_tensors/set_tensors error. Tensor with element type ",
item.get_element_type(),
" and shape ",
item_shape,
" is not compatible with batched tensor with element type ",
element_type,
" and shape ",
batched_shape);
}
}
} // namespace
ov::IInferRequest::~IInferRequest() = default;
ov::ISyncInferRequest::ISyncInferRequest(const std::shared_ptr<ov::ICompiledModel>& compiled_model)
: m_compiled_model(compiled_model) {}
const std::vector<ov::Output<const ov::Node>>& ov::ISyncInferRequest::get_inputs() const {
return m_compiled_model->inputs();
}
const std::vector<ov::Output<const ov::Node>>& ov::ISyncInferRequest::get_outputs() const {
return m_compiled_model->outputs();
}
const std::shared_ptr<ov::ICompiledModel>& ov::ISyncInferRequest::get_compiled_model() const {
return m_compiled_model;
}
ov::ISyncInferRequest::FoundPort ov::ISyncInferRequest::find_port(const ov::Output<const ov::Node>& port) const {
ov::ISyncInferRequest::FoundPort::Type type = ov::ISyncInferRequest::FoundPort::Type::INPUT;
for (const auto& ports : {get_inputs(), get_outputs()}) {
for (size_t i = 0; i < ports.size(); i++) {
if (ports[i] == port) {
return {i, type};
}
}
type = ov::ISyncInferRequest::FoundPort::Type::OUTPUT;
}
return {0, ov::ISyncInferRequest::FoundPort::Type::NOT_FOUND};
}
void ov::ISyncInferRequest::convert_batched_tensors() {
for (const auto& item : m_batched_tensors) {
auto tmp_shape = item.second.at(0).get_shape();
auto tmp_et = item.second.at(0).get_element_type();
tmp_shape[0] = item.second.size();
ov::RemoteContext remote_context;
ov::Tensor input_tensor;
try {
auto net = get_compiled_model();
if (net) {
remote_context = net->get_context();
}
} catch (const ov::NotImplemented&) {
}
if (remote_context._impl) {
input_tensor = remote_context.create_host_tensor(tmp_et, tmp_shape);
} else {
input_tensor = ov::Tensor(tmp_et, tmp_shape);
}
auto ptr = input_tensor.data<uint8_t>();
// Perform memory copy
ov::parallel_for(input_tensor.get_size(), [&](size_t i) {
const auto& tensor = item.second.at(i);
memcpy(ptr + i * tensor.get_byte_size(), tensor.data<uint8_t>(), tensor.get_byte_size());
});
set_tensor(get_inputs()[item.first], input_tensor);
}
}
ov::Tensor ov::ISyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "get_tensor");
auto found_port = find_port(port);
OPENVINO_ASSERT(!found_port.found(), "Cannot find tensor for port ", port);
if (found_port.is_input()) {
auto input = m_compiled_model->inputs().at(found_port.idx);
// TODO: Support dynamic inputs
// if (input.get_partial_shape().is_dynamic())
return m_input_tensors.at(found_port.idx);
}
auto output = m_compiled_model->outputs().at(found_port.idx);
// TODO: Support dynamic inputs
// if (output.get_partial_shape().is_dynamic())
return m_output_tensors.at(found_port.idx);
}
void ov::ISyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) {
OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "set_tensor");
auto found_port = find_port(port);
OPENVINO_ASSERT(!found_port.found(), "Cannot find tensor for port ", port);
OPENVINO_ASSERT(
port.get_element_type() == tensor.get_element_type(),
"Failed to set output tensor, the tensor element type is not corresponding with output element type");
OPENVINO_ASSERT(port.get_partial_shape().is_dynamic() || tensor.get_shape() == port.get_shape(),
"Input tensor size is not equal with model input size (",
tensor.get_shape(),
" != ",
port.get_shape(),
").");
if (found_port.is_input()) {
m_input_tensors.at(found_port.idx) = tensor;
m_batched_tensors.erase(found_port.idx);
} else {
m_output_tensors.at(found_port.idx) = tensor;
}
}
std::vector<ov::Tensor> ov::ISyncInferRequest::get_tensors(const ov::Output<const ov::Node>& port) const {
OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "get_tensors");
auto found_port = find_port(port);
OPENVINO_ASSERT(!found_port.found() && found_port.is_input(), "Cannot find input tensors for port ", port);
if (m_batched_tensors.count(found_port.idx))
return m_batched_tensors.at(found_port.idx);
return {};
}
void ov::ISyncInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
const std::vector<ov::Tensor>& tensors) {
OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "set_tensors");
auto found_port = find_port(port);
OPENVINO_ASSERT(!found_port.found() && found_port.is_input(), "Cannot find input tensors for port ", port);
if (tensors.size() == 1) {
set_tensor(port, tensors[0]);
return;
}
check_batched_tensors(port, tensors);
set_tensors_impl(port, tensors);
}
void ov::ISyncInferRequest::set_tensors_impl(const ov::Output<const ov::Node> port,
const std::vector<ov::Tensor>& tensors) {
OPENVINO_ASSERT_HELPER(::ov::NotImplemented,
"",
false,
"Not Implemented",
"set_input_tensors/set_tensors are not supported by this plugin");
}
void ov::ISyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) const {
bool is_input = ov::op::util::is_parameter(port.get_node());
std::string tensor_type = is_input ? "input" : "output";
bool is_dynamic = port.get_partial_shape().is_dynamic();
OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor.get_shape(),
"The ",
tensor_type,
" tensor size is not equal to the model ",
tensor_type,
" type: got ",
tensor.get_size(),
" expecting ",
port.get_shape(),
".");
}
void ov::ISyncInferRequest::check_tensors() const {
const auto& inputs = m_compiled_model->inputs();
for (size_t i = 0; i < inputs.size(); i++) {
check_tensor(inputs[i], m_input_tensors[i]);
}
const auto& outputs = m_compiled_model->outputs();
for (size_t i = 0; i < outputs.size(); i++) {
check_tensor(outputs[i], m_output_tensors[i]);
}
}

View File

@ -0,0 +1,295 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/runtime/infer_request.hpp"
#include <map>
#include <memory>
#include <string>
#include "ie_common.h"
#include "openvino/core/node.hpp"
#include "openvino/runtime/compiled_model.hpp"
#include "openvino/runtime/exception.hpp"
#include "openvino/runtime/iasync_infer_request.hpp"
#include "transformations/utils/utils.hpp"
#define OV_INFER_REQ_CALL_STATEMENT(...) \
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized."); \
try { \
__VA_ARGS__; \
} catch (const ::InferenceEngine::RequestBusy& ex) { \
throw ov::Busy(ex.what()); \
} catch (const std::exception& ex) { \
throw ov::Exception(ex.what()); \
} catch (...) { \
OPENVINO_ASSERT(false, "Unexpected exception"); \
}
namespace {
inline bool getPort(ov::Output<const ov::Node>& res_port,
const std::string& name,
const std::vector<std::vector<ov::Output<const ov::Node>>>& vector_ports) {
for (const auto& ports : vector_ports) {
for (const auto& port : ports) {
const auto& names = port.get_names();
if (names.find(name) != names.end()) {
res_port = port;
return true;
}
}
}
return false;
}
} // namespace
namespace ov {
InferRequest::~InferRequest() {
_impl = {};
}
InferRequest::InferRequest(const std::shared_ptr<ov::IAsyncInferRequest>& impl, const std::shared_ptr<void>& so)
: _impl{impl},
_so{so} {
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
}
void InferRequest::set_tensor(const ov::Output<const ov::Node>& port, const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({ _impl->set_tensor(port, tensor); });
}
void InferRequest::set_tensor(const ov::Output<ov::Node>& port, const Tensor& tensor) {
set_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()), tensor);
}
void InferRequest::set_tensor(const std::string& name, const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
ov::Output<const ov::Node> port;
OPENVINO_ASSERT(::getPort(port, name, {_impl->get_inputs(), _impl->get_outputs()}),
"Port for tensor name " + name + " was not found.");
set_tensor(port, tensor);
});
}
void InferRequest::set_tensors(const std::string& name, const std::vector<Tensor>& tensors) {
OV_INFER_REQ_CALL_STATEMENT({
ov::Output<const ov::Node> port;
OPENVINO_ASSERT(::getPort(port, name, {_impl->get_inputs()}),
"set_tensors error. Input port for tensor name ",
name,
" was not found.");
set_tensors(port, tensors);
})
}
void InferRequest::set_tensors(const ov::Output<const ov::Node>& port, const std::vector<Tensor>& tensors) {
OV_INFER_REQ_CALL_STATEMENT({ _impl->set_tensors(port, tensors); })
}
void InferRequest::set_input_tensor(size_t idx, const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
const auto& inputs = _impl->get_inputs();
OPENVINO_ASSERT(inputs.size() > idx,
"Input port for index ",
idx,
" was not found! The model has only ",
inputs.size(),
" inputs.");
set_tensor(inputs.at(idx), tensor);
});
}
void InferRequest::set_input_tensor(const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
const auto& inputs = _impl->get_inputs();
OPENVINO_ASSERT(inputs.size() == 1,
"set_input_tensor() must be called on a function with exactly one parameter.");
set_tensor(inputs.at(0), tensor);
});
}
void InferRequest::set_input_tensors(size_t idx, const std::vector<Tensor>& tensors) {
OV_INFER_REQ_CALL_STATEMENT({
OPENVINO_ASSERT(idx < _impl->get_inputs().size(),
"set_input_tensors error. Input port for index ",
idx,
" is out of bounds. Model has only ",
_impl->get_inputs().size(),
" inputs");
set_tensors(_impl->get_inputs().at(idx), tensors);
})
}
void InferRequest::set_input_tensors(const std::vector<Tensor>& tensors) {
OV_INFER_REQ_CALL_STATEMENT({
OPENVINO_ASSERT(_impl->get_inputs().size() == 1,
"set_input_tensors(tensors) must be used for single-input models only. Model has ",
_impl->get_inputs().size(),
" inputs");
set_tensors(_impl->get_inputs().at(0), tensors);
})
}
void InferRequest::set_output_tensor(size_t idx, const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
const auto& outputs = _impl->get_outputs();
OPENVINO_ASSERT(outputs.size() > idx,
"Output port for index ",
idx,
" was not found! The model has only ",
outputs.size(),
" outputs.");
set_tensor(outputs.at(idx), tensor);
});
}
void InferRequest::set_output_tensor(const Tensor& tensor) {
OV_INFER_REQ_CALL_STATEMENT({
const auto& outputs = _impl->get_outputs();
OPENVINO_ASSERT(outputs.size() == 1,
"set_output_tensor() must be called on a function with exactly one parameter.");
set_tensor(outputs.at(0), tensor);
});
}
Tensor InferRequest::get_tensor(const ov::Output<const ov::Node>& port) {
std::vector<std::shared_ptr<void>> soVec;
OV_INFER_REQ_CALL_STATEMENT({
OPENVINO_ASSERT(_impl->get_tensors(port).empty(),
"get_tensor shall not be used together with batched "
"set_tensors/set_input_tensors for port '",
port,
"'");
auto tensor = _impl->get_tensor(port);
tensor._so.emplace_back(_so);
return tensor;
});
}
Tensor InferRequest::get_tensor(const ov::Output<ov::Node>& port) {
return get_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()));
}
Tensor InferRequest::get_tensor(const std::string& name) {
OV_INFER_REQ_CALL_STATEMENT({
ov::Output<const ov::Node> port;
OPENVINO_ASSERT(::getPort(port, name, {_impl->get_inputs(), _impl->get_outputs()}),
"Port for tensor name " + name + " was not found.");
return get_tensor(port);
});
}
Tensor InferRequest::get_input_tensor(size_t idx) {
OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->get_inputs().at(idx)); });
}
Tensor InferRequest::get_output_tensor(size_t idx) {
OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->get_outputs().at(idx)); });
}
Tensor InferRequest::get_input_tensor() {
OV_INFER_REQ_CALL_STATEMENT({
const auto inputs = _impl->get_inputs();
if (inputs.size() != 1) {
throw ov::Exception("get_input_tensor() must be called on a function with exactly one parameter.");
}
return get_tensor(inputs.at(0));
});
}
Tensor InferRequest::get_output_tensor() {
OV_INFER_REQ_CALL_STATEMENT({
const auto outputs = _impl->get_outputs();
if (outputs.size() != 1) {
throw ov::Exception("get_output_tensor() must be called on a function with exactly one parameter.");
}
return get_tensor(outputs.at(0));
});
}
void InferRequest::infer() {
OV_INFER_REQ_CALL_STATEMENT(_impl->infer());
}
void InferRequest::cancel() {
OV_INFER_REQ_CALL_STATEMENT(_impl->cancel());
}
std::vector<ProfilingInfo> InferRequest::get_profiling_info() const {
OV_INFER_REQ_CALL_STATEMENT(return _impl->get_profiling_info());
}
void InferRequest::start_async() {
OV_INFER_REQ_CALL_STATEMENT(_impl->start_async());
}
void InferRequest::wait() {
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
try {
_impl->wait();
} catch (const ov::Cancelled&) {
throw;
} catch (const ie::InferCancelled& e) {
throw Cancelled{e.what()};
} catch (const std::exception& ex) {
throw Exception(ex.what());
} catch (...) {
OPENVINO_UNREACHABLE("Unexpected exception");
}
}
bool InferRequest::wait_for(const std::chrono::milliseconds timeout) {
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
try {
return _impl->wait_for(timeout);
} catch (const ie::InferCancelled& e) {
throw Cancelled{e.what()};
} catch (const std::exception& ex) {
throw Exception(ex.what());
} catch (...) {
OPENVINO_UNREACHABLE("Unexpected exception");
}
}
void InferRequest::set_callback(std::function<void(std::exception_ptr)> callback) {
OV_INFER_REQ_CALL_STATEMENT(_impl->set_callback(std::move(callback));)
}
std::vector<VariableState> InferRequest::query_state() {
std::vector<VariableState> variable_states;
OV_INFER_REQ_CALL_STATEMENT({
for (auto&& state : _impl->query_state()) {
auto soVec = state._so;
soVec.emplace_back(_so);
variable_states.emplace_back(ov::VariableState{state._impl, soVec});
}
})
return variable_states;
}
CompiledModel InferRequest::get_compiled_model() {
OV_INFER_REQ_CALL_STATEMENT(return {_impl->get_compiled_model(), _so});
}
bool InferRequest::operator!() const noexcept {
return !_impl;
}
InferRequest::operator bool() const noexcept {
return (!!_impl);
}
bool InferRequest::operator!=(const InferRequest& r) const noexcept {
return !(r == *this);
}
bool InferRequest::operator==(const InferRequest& r) const noexcept {
return r._impl == _impl;
}
} // namespace ov

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2022 Intel Corporation
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

View File

@ -26,6 +26,7 @@ ov_mark_target_as_cc(${TARGET_NAME})
target_include_directories(${TARGET_NAME} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}"
"${OpenVINO_SOURCE_DIR}/src/inference/src/dev" # TODO: remove after migration to new infer request
"${TEMPLATE_PLUGIN_SOURCE_DIR}/include")
# link common Inference Engine libraries

View File

@ -6,8 +6,12 @@
#include <memory>
#include "converter_utils.hpp"
#include "ie_ngraph_utils.hpp"
#include "ie_plugin_config.hpp"
#include "openvino/core/except.hpp"
#include "openvino/runtime/iinfer_request.hpp"
#include "openvino/runtime/isync_infer_request.hpp"
#include "plugin.hpp"
#include "template/config.hpp"
#include "template_async_infer_request.hpp"
@ -124,17 +128,8 @@ void TemplatePlugin::CompiledModel::compile_model(const std::shared_ptr<ov::Mode
// ! [executable_network:map_graph]
// ! [executable_network:create_infer_request]
std::shared_ptr<InferenceEngine::IInferRequestInternal> TemplatePlugin::CompiledModel::create_infer_request() const {
auto internal_request = create_sync_infer_request();
return std::make_shared<TemplateAsyncInferRequest>(
std::static_pointer_cast<TemplatePlugin::TemplateInferRequest>(internal_request),
get_task_executor(),
get_template_plugin()->_waitExecutor,
get_callback_executor());
}
std::shared_ptr<InferenceEngine::IInferRequestInternal> TemplatePlugin::CompiledModel::create_sync_infer_request()
const {
std::shared_ptr<ov::IAsyncInferRequest> TemplatePlugin::CompiledModel::create_infer_request() const {
// auto internal_request = create_sync_infer_request();
std::vector<std::shared_ptr<const ov::Node>> _inputs, _outputs;
for (const auto& output : m_model->inputs()) {
_inputs.emplace_back(output.get_node_shared_ptr());
@ -143,10 +138,36 @@ std::shared_ptr<InferenceEngine::IInferRequestInternal> TemplatePlugin::Compiled
_outputs.emplace_back(output.get_node_shared_ptr());
}
return std::make_shared<TemplateInferRequest>(
auto internal_request = std::make_shared<TemplateInferRequest>(
_inputs,
_outputs,
std::static_pointer_cast<const TemplatePlugin::CompiledModel>(shared_from_this()));
auto async_infer_request = std::make_shared<TemplateAsyncInferRequest>(
std::static_pointer_cast<TemplatePlugin::TemplateInferRequest>(internal_request),
get_task_executor(),
get_template_plugin()->_waitExecutor,
get_callback_executor());
async_infer_request->setPointerToExecutableNetworkInternal(
ov::legacy_convert::convert_compiled_model(std::const_pointer_cast<ov::ICompiledModel>(shared_from_this())));
return ov::legacy_convert::convert_infer_request(async_infer_request);
}
std::shared_ptr<ov::ISyncInferRequest> TemplatePlugin::CompiledModel::create_sync_infer_request() const {
OPENVINO_NOT_IMPLEMENTED;
// std::vector<std::shared_ptr<const ov::Node>> _inputs, _outputs;
// for (const auto& output : m_model->inputs()) {
// _inputs.emplace_back(output.get_node_shared_ptr());
// }
// for (const auto& output : outputs()) {
// _outputs.emplace_back(output.get_node_shared_ptr());
// }
//
// return std::make_shared<TemplateInferRequest>(
// _inputs,
// _outputs,
// std::static_pointer_cast<const TemplatePlugin::CompiledModel>(shared_from_this()));
}
// ! [executable_network:create_infer_request]

View File

@ -5,6 +5,9 @@
#pragma once
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/iinfer_request.hpp"
#include "openvino/runtime/isync_infer_request.hpp"
#include "openvino/runtime/tensor.hpp"
#include "template_config.hpp"
#include "template_infer_request.hpp"
@ -34,10 +37,10 @@ public:
virtual ov::Any get_property(const std::string& name) const override;
ov::RemoteContext get_context() const override;
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_infer_request() const override;
std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;
protected:
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_sync_infer_request() const override;
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
private:
friend class TemplateInferRequest;