Added classes for Sync and Async infer requests (#15387)
* Added classes for Sync and Async infer requests * Changed hierarchy of Infer requests * Fixed code style * Fixed some tests * Fixed naming style * Fixed template plugin build * Temporary disable python tests * Revert "Temporary disable python tests" This reverts commitc9aa9d79f8
. * Fixed template plugin tests * Disable python tests * Disable more steps * Merged CI * Revert "Merged CI" This reverts commit2f69574870
. * Try to fix segfault in python tests * Remove default constructor * Fixed documentation * Fixed CPU tests * Fixed Windows build * Fixed comments * Fixed build
This commit is contained in:
parent
609dee0abc
commit
b80d05e0e1
@ -19,6 +19,7 @@
|
||||
|
||||
namespace InferenceEngine {
|
||||
class Blob;
|
||||
class IAsyncInferRequestWrapper;
|
||||
} // namespace InferenceEngine
|
||||
|
||||
namespace ov {
|
||||
@ -28,6 +29,7 @@ class CoreImpl;
|
||||
class InferRequest;
|
||||
class RemoteContext;
|
||||
class VariableState;
|
||||
class IInferRequestInternalWrapper;
|
||||
|
||||
/**
|
||||
* @brief Tensor API holding host memory
|
||||
@ -52,6 +54,8 @@ protected:
|
||||
friend class ov::InferRequest;
|
||||
friend class ov::RemoteContext;
|
||||
friend class ov::VariableState;
|
||||
friend class ov::IInferRequestInternalWrapper;
|
||||
friend class InferenceEngine::IAsyncInferRequestWrapper;
|
||||
|
||||
public:
|
||||
/// @brief Default constructor
|
||||
|
276
src/inference/dev_api/openvino/runtime/iasync_infer_request.hpp
Normal file
276
src/inference/dev_api/openvino/runtime/iasync_infer_request.hpp
Normal file
@ -0,0 +1,276 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief OpenVINO Runtime AsyncInferRequest interface
|
||||
* @file openvino/runtime/iasync_nfer_request.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <future>
|
||||
#include <memory>
|
||||
|
||||
#include "openvino/runtime/common.hpp"
|
||||
#include "openvino/runtime/exception.hpp"
|
||||
#include "openvino/runtime/iinfer_request.hpp"
|
||||
#include "openvino/runtime/profiling_info.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "threading/ie_itask_executor.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
/**
|
||||
* @brief Base class with default implementation of asynchronous multi staged inference request.
|
||||
* To customize pipeline stages derived class should change the content
|
||||
* of IAsyncInferRequest::m_pipeline member container.
|
||||
* It consists of pairs of tasks and executors which will run the task.
|
||||
* The class is recommended to be used by plugins as a base class for asynchronous inference request
|
||||
* implementation.
|
||||
* @note To synchronize derived context with stages
|
||||
* derived class should call IAsyncInferRequest::stop_and_wait() function in destructor.
|
||||
* @par Example
|
||||
* Here is an example of asynchronous inference request implementation for some accelerator device.
|
||||
* It uses 5 different executors to run different stages of a synchronous inference request.
|
||||
*/
|
||||
class OPENVINO_RUNTIME_API IAsyncInferRequest : public IInferRequest {
|
||||
public:
|
||||
IAsyncInferRequest(const std::shared_ptr<IInferRequest>& request,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& task_executor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callback_executor);
|
||||
~IAsyncInferRequest();
|
||||
|
||||
/**
|
||||
* @brief Start inference of specified input(s) in asynchronous mode
|
||||
* @note The method returns immediately. Inference starts also immediately.
|
||||
*/
|
||||
virtual void start_async();
|
||||
|
||||
/**
|
||||
* @brief Waits for the result to become available.
|
||||
*/
|
||||
virtual void wait();
|
||||
/**
|
||||
* @brief Waits for the result to become available. Blocks until specified timeout has elapsed or the result
|
||||
* becomes available, whichever comes first.
|
||||
* @param timeout - maximum duration in milliseconds to block for
|
||||
* @return A true if results are ready.
|
||||
*/
|
||||
virtual bool wait_for(const std::chrono::milliseconds& timeout);
|
||||
|
||||
/**
|
||||
* @brief Cancel current inference request execution
|
||||
*/
|
||||
virtual void cancel();
|
||||
|
||||
/**
|
||||
* @brief Set callback function which will be called on success or failure of asynchronous request
|
||||
* @param callback - function to be called with the following description:
|
||||
*/
|
||||
virtual void set_callback(std::function<void(std::exception_ptr)> callback);
|
||||
|
||||
/**
|
||||
* @brief Infers specified input(s) in synchronous mode
|
||||
* @note blocks all method of InferRequest while request is ongoing (running or waiting in queue)
|
||||
*/
|
||||
void infer() override;
|
||||
|
||||
/**
|
||||
* @brief Queries performance measures per layer to identify the most time consuming operation.
|
||||
* @note Not all plugins provide meaningful data.
|
||||
* @return Vector of profiling information for operations in a model.
|
||||
*/
|
||||
std::vector<ov::ProfilingInfo> get_profiling_info() const override;
|
||||
|
||||
/**
|
||||
* @brief Gets an input/output tensor for inference.
|
||||
* @note If the tensor with the specified @p port is not found, an exception is thrown.
|
||||
* @param port Port of the tensor to get.
|
||||
* @return Tensor for the port @p port.
|
||||
*/
|
||||
ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const override;
|
||||
|
||||
/**
|
||||
* @brief Sets an input/output tensor to infer.
|
||||
* @param port Port of the input or output tensor.
|
||||
* @param tensor Reference to a tensor. The element_type and shape of a tensor must match
|
||||
* the model's input/output element_type and size.
|
||||
*/
|
||||
void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override;
|
||||
|
||||
/**
|
||||
* @brief Gets a batch of tensors for input data to infer by input port.
|
||||
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
|
||||
* The current version supports setting tensors to model inputs only. If @p port is associated
|
||||
* with output (or any other non-input node), an exception is thrown.
|
||||
*
|
||||
* @param port Port of the input tensor.
|
||||
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
|
||||
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
|
||||
* @return vector of tensors
|
||||
*/
|
||||
std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const override;
|
||||
/**
|
||||
* @brief Sets a batch of tensors for input data to infer by input port.
|
||||
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
|
||||
* The current version supports setting tensors to model inputs only. If @p port is associated
|
||||
* with output (or any other non-input node), an exception is thrown.
|
||||
*
|
||||
* @param port Port of the input tensor.
|
||||
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
|
||||
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
|
||||
*/
|
||||
void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) override;
|
||||
|
||||
/**
|
||||
* @brief Gets state control interface for the given infer request.
|
||||
*
|
||||
* State control essential for recurrent models.
|
||||
* @return Vector of Variable State objects.
|
||||
*/
|
||||
std::vector<ov::VariableState> query_state() const override;
|
||||
|
||||
/**
|
||||
* @brief Gets pointer to compiled model (usually synchronous request holds the compiled model)
|
||||
*
|
||||
* @return Pointer to the compiled model
|
||||
*/
|
||||
const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const override;
|
||||
|
||||
/**
|
||||
* @brief Gets inputs for infer request
|
||||
*
|
||||
* @return vector of input ports
|
||||
*/
|
||||
const std::vector<ov::Output<const ov::Node>>& get_inputs() const override;
|
||||
|
||||
/**
|
||||
* @brief Gets outputs for infer request
|
||||
*
|
||||
* @return vector of output ports
|
||||
*/
|
||||
const std::vector<ov::Output<const ov::Node>>& get_outputs() const override;
|
||||
|
||||
protected:
|
||||
using Stage = std::pair<InferenceEngine::ITaskExecutor::Ptr, InferenceEngine::Task>;
|
||||
/**
|
||||
* @brief Pipeline is vector of stages
|
||||
*/
|
||||
using Pipeline = std::vector<Stage>;
|
||||
|
||||
/**
|
||||
* @brief Forbids pipeline start and wait for all started pipelines.
|
||||
* @note Should be called in derived class destructor to wait for completion of usage of derived context captured by
|
||||
* pipeline tasks
|
||||
*/
|
||||
void stop_and_wait();
|
||||
|
||||
/**
|
||||
* @brief Throws exception if inference request is busy or canceled
|
||||
*/
|
||||
void check_state() const;
|
||||
/**
|
||||
* @brief Performs inference of pipeline in syncronous mode
|
||||
* @note Used by Infer which ensures thread-safety and calls this method after.
|
||||
*/
|
||||
virtual void infer_thread_unsafe();
|
||||
/**
|
||||
* @brief Starts an asynchronous pipeline thread unsafe.
|
||||
* @note Used by start_async which ensures thread-safety and calls this method after.
|
||||
*/
|
||||
virtual void start_async_thread_unsafe();
|
||||
/**
|
||||
* @brief Check that all tensors are valid. Throws an exception if it's not.
|
||||
*/
|
||||
void check_tensors() const override;
|
||||
|
||||
Pipeline m_pipeline; //!< Pipeline variable that should be filled by inherited class.
|
||||
Pipeline m_sync_pipeline; //!< Synchronous pipeline variable that should be filled by inherited class.
|
||||
|
||||
private:
|
||||
enum InferState { IDLE, BUSY, CANCELLED, STOP };
|
||||
using Futures = std::vector<std::shared_future<void>>;
|
||||
enum Stage_e : std::uint8_t { EXECUTOR, TASK };
|
||||
InferState m_state = InferState::IDLE;
|
||||
Futures m_futures;
|
||||
std::promise<void> m_promise;
|
||||
|
||||
friend struct DisableCallbackGuard;
|
||||
struct DisableCallbackGuard {
|
||||
explicit DisableCallbackGuard(IAsyncInferRequest* this_) : _this{this_} {
|
||||
std::lock_guard<std::mutex> lock{_this->m_mutex};
|
||||
std::swap(m_callback, _this->m_callback);
|
||||
}
|
||||
~DisableCallbackGuard() {
|
||||
std::lock_guard<std::mutex> lock{_this->m_mutex};
|
||||
_this->m_callback = m_callback;
|
||||
}
|
||||
IAsyncInferRequest* _this = nullptr;
|
||||
std::function<void(std::exception_ptr)> m_callback;
|
||||
};
|
||||
|
||||
void run_first_stage(const Pipeline::iterator itBeginStage,
|
||||
const Pipeline::iterator itEndStage,
|
||||
const InferenceEngine::ITaskExecutor::Ptr callbackExecutor = {});
|
||||
|
||||
InferenceEngine::Task make_next_stage_task(const Pipeline::iterator itStage,
|
||||
const Pipeline::iterator itEndStage,
|
||||
const InferenceEngine::ITaskExecutor::Ptr callbackExecutor);
|
||||
|
||||
template <typename F>
|
||||
void infer_impl(const F& f) {
|
||||
check_tensors();
|
||||
InferState state = InferState::IDLE;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{m_mutex};
|
||||
state = m_state;
|
||||
switch (m_state) {
|
||||
case InferState::BUSY:
|
||||
throw ov::Busy("Infer Request is busy");
|
||||
case InferState::CANCELLED:
|
||||
throw ov::Cancelled("Infer Request was canceled");
|
||||
case InferState::IDLE: {
|
||||
m_futures.erase(std::remove_if(std::begin(m_futures),
|
||||
std::end(m_futures),
|
||||
[](const std::shared_future<void>& future) {
|
||||
if (future.valid()) {
|
||||
return (std::future_status::ready ==
|
||||
future.wait_for(std::chrono::milliseconds{0}));
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}),
|
||||
m_futures.end());
|
||||
m_promise = {};
|
||||
m_futures.emplace_back(m_promise.get_future().share());
|
||||
} break;
|
||||
case InferState::STOP:
|
||||
break;
|
||||
}
|
||||
m_state = InferState::BUSY;
|
||||
}
|
||||
if (state != InferState::STOP) {
|
||||
try {
|
||||
f();
|
||||
} catch (...) {
|
||||
m_promise.set_exception(std::current_exception());
|
||||
std::lock_guard<std::mutex> lock{m_mutex};
|
||||
m_state = InferState::IDLE;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<IInferRequest> m_sync_request;
|
||||
|
||||
InferenceEngine::ITaskExecutor::Ptr m_request_executor; //!< Used to run inference CPU tasks.
|
||||
InferenceEngine::ITaskExecutor::Ptr
|
||||
m_callback_executor; //!< Used to run post inference callback in asynchronous pipline
|
||||
InferenceEngine::ITaskExecutor::Ptr
|
||||
m_sync_callback_executor; //!< Used to run post inference callback in synchronous pipline
|
||||
mutable std::mutex m_mutex;
|
||||
std::function<void(std::exception_ptr)> m_callback;
|
||||
};
|
||||
|
||||
} // namespace ov
|
@ -13,15 +13,14 @@
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
#include "cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/runtime/common.hpp"
|
||||
#include "openvino/runtime/isync_infer_request.hpp"
|
||||
#include "openvino/runtime/remote_context.hpp"
|
||||
#include "threading/ie_cpu_streams_executor.hpp"
|
||||
#include "threading/ie_itask_executor.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
class IInferRequestInternal;
|
||||
class ICompiledModelWrapper;
|
||||
} // namespace InferenceEngine
|
||||
|
||||
@ -30,6 +29,7 @@ namespace ov {
|
||||
class CoreImpl;
|
||||
class IPlugin;
|
||||
class IExecutableNetworkWrapper;
|
||||
class IAsyncInferRequest;
|
||||
|
||||
/**
|
||||
* @brief OpenVINO ICompiledModel interface
|
||||
@ -73,9 +73,9 @@ public:
|
||||
/**
|
||||
* @brief Create infer request
|
||||
*
|
||||
* @return Infer request interface
|
||||
* @return Asynchronous infer request interface
|
||||
*/
|
||||
virtual std::shared_ptr<InferenceEngine::IInferRequestInternal> create_infer_request() const;
|
||||
virtual std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const;
|
||||
|
||||
/**
|
||||
* @brief Export compiled model to stream
|
||||
@ -141,7 +141,7 @@ protected:
|
||||
*
|
||||
* @return Sync infer request
|
||||
*/
|
||||
virtual std::shared_ptr<InferenceEngine::IInferRequestInternal> create_sync_infer_request() const = 0;
|
||||
virtual std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const = 0;
|
||||
|
||||
/**
|
||||
* @brief Default implementation of create async inter request method
|
||||
@ -149,11 +149,11 @@ protected:
|
||||
* @tparam AsyncInferRequestType Async infer request type. InferenceEngine::AsyncInferRequestThreadSafeDefault by
|
||||
* default
|
||||
*
|
||||
* @return Async infer request
|
||||
* @return Asynchronous infer request
|
||||
*/
|
||||
template <typename AsyncInferRequestType = InferenceEngine::AsyncInferRequestThreadSafeDefault>
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_async_infer_request() const {
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> syncRequestImpl = this->create_sync_infer_request();
|
||||
template <typename AsyncInferRequestType = ov::IAsyncInferRequest>
|
||||
std::shared_ptr<ov::IAsyncInferRequest> create_async_infer_request() const {
|
||||
auto syncRequestImpl = create_sync_infer_request();
|
||||
return std::make_shared<AsyncInferRequestType>(syncRequestImpl, m_task_executor, m_callback_executor);
|
||||
}
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <memory>
|
||||
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "so_ptr.hpp"
|
||||
|
||||
|
121
src/inference/dev_api/openvino/runtime/iinfer_request.hpp
Normal file
121
src/inference/dev_api/openvino/runtime/iinfer_request.hpp
Normal file
@ -0,0 +1,121 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief OpenVINO Runtime InferRequest interface
|
||||
* @file openvino/runtime/iinfer_request.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "openvino/runtime/common.hpp"
|
||||
#include "openvino/runtime/profiling_info.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
class IAsyncInferRequest;
|
||||
class ICompiledModel;
|
||||
|
||||
class OPENVINO_RUNTIME_API IInferRequest {
|
||||
public:
|
||||
virtual ~IInferRequest();
|
||||
|
||||
/**
|
||||
* @brief Infers specified input(s) in synchronous mode
|
||||
* @note blocks all method of InferRequest while request is ongoing (running or waiting in queue)
|
||||
*/
|
||||
virtual void infer() = 0;
|
||||
|
||||
/**
|
||||
* @brief Queries performance measures per layer to identify the most time consuming operation.
|
||||
* @note Not all plugins provide meaningful data.
|
||||
* @return Vector of profiling information for operations in a model.
|
||||
*/
|
||||
virtual std::vector<ov::ProfilingInfo> get_profiling_info() const = 0;
|
||||
|
||||
/**
|
||||
* @brief Gets an input/output tensor for inference.
|
||||
* @note If the tensor with the specified @p port is not found, an exception is thrown.
|
||||
* @param port Port of the tensor to get.
|
||||
* @return Tensor for the port @p port.
|
||||
*/
|
||||
virtual ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const = 0;
|
||||
|
||||
/**
|
||||
* @brief Sets an input/output tensor to infer.
|
||||
* @param port Port of the input or output tensor.
|
||||
* @param tensor Reference to a tensor. The element_type and shape of a tensor must match
|
||||
* the model's input/output element_type and size.
|
||||
*/
|
||||
virtual void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief Gets a batch of tensors for input data to infer by input port.
|
||||
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
|
||||
* The current version supports setting tensors to model inputs only. If @p port is associated
|
||||
* with output (or any other non-input node), an exception is thrown.
|
||||
*
|
||||
* @param port Port of the input tensor.
|
||||
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
|
||||
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
|
||||
* @return vector of tensors
|
||||
*/
|
||||
virtual std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const = 0;
|
||||
|
||||
/**
|
||||
* @brief Sets a batch of tensors for input data to infer by input port.
|
||||
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
|
||||
* The current version supports setting tensors to model inputs only. If @p port is associated
|
||||
* with output (or any other non-input node), an exception is thrown.
|
||||
*
|
||||
* @param port Port of the input tensor.
|
||||
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
|
||||
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
|
||||
*/
|
||||
virtual void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) = 0;
|
||||
|
||||
/**
|
||||
* @brief Gets state control interface for the given infer request.
|
||||
*
|
||||
* State control essential for recurrent models.
|
||||
* @return Vector of Variable State objects.
|
||||
*/
|
||||
virtual std::vector<ov::VariableState> query_state() const = 0;
|
||||
|
||||
/**
|
||||
* @brief Gets pointer to compiled model (usually synchronous request holds the compiled model)
|
||||
*
|
||||
* @return Pointer to the compiled model
|
||||
*/
|
||||
virtual const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const = 0;
|
||||
|
||||
/**
|
||||
* @brief Gets inputs for infer request
|
||||
*
|
||||
* @return vector of input ports
|
||||
*/
|
||||
virtual const std::vector<ov::Output<const ov::Node>>& get_inputs() const = 0;
|
||||
|
||||
/**
|
||||
* @brief Gets outputs for infer request
|
||||
*
|
||||
* @return vector of output ports
|
||||
*/
|
||||
virtual const std::vector<ov::Output<const ov::Node>>& get_outputs() const = 0;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief Check that all tensors are valid. Throws an exception if it's not.
|
||||
*/
|
||||
virtual void check_tensors() const = 0;
|
||||
friend IAsyncInferRequest;
|
||||
};
|
||||
|
||||
}; // namespace ov
|
153
src/inference/dev_api/openvino/runtime/isync_infer_request.hpp
Normal file
153
src/inference/dev_api/openvino/runtime/isync_infer_request.hpp
Normal file
@ -0,0 +1,153 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief OpenVINO Runtime InferRequest interface
|
||||
* @file openvino/runtime/isync_infer_request.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <openvino/runtime/tensor.hpp>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "openvino/runtime/common.hpp"
|
||||
#include "openvino/runtime/iinfer_request.hpp"
|
||||
#include "openvino/runtime/profiling_info.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
/**
|
||||
* @brief Interface for syncronous infer request
|
||||
*/
|
||||
class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructs syncronous inference request
|
||||
*
|
||||
* @param compiled_model pointer to compiled model
|
||||
*/
|
||||
ISyncInferRequest(const std::shared_ptr<ov::ICompiledModel>& compiled_model);
|
||||
|
||||
/**
|
||||
* @brief Gets an input/output tensor for inference.
|
||||
* @note If the tensor with the specified @p port is not found, an exception is thrown.
|
||||
* @param port Port of the tensor to get.
|
||||
* @return Tensor for the port @p port.
|
||||
*/
|
||||
ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const override;
|
||||
|
||||
/**
|
||||
* @brief Sets an input/output tensor to infer.
|
||||
* @param port Port of the input or output tensor.
|
||||
* @param tensor Reference to a tensor. The element_type and shape of a tensor must match
|
||||
* the model's input/output element_type and size.
|
||||
*/
|
||||
void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override;
|
||||
|
||||
/**
|
||||
* @brief Gets a batch of tensors for input data to infer by input port.
|
||||
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
|
||||
* The current version supports setting tensors to model inputs only. If @p port is associated
|
||||
* with output (or any other non-input node), an exception is thrown.
|
||||
*
|
||||
* @param port Port of the input tensor.
|
||||
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
|
||||
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
|
||||
* @return vector of tensors
|
||||
*/
|
||||
std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const override;
|
||||
|
||||
/**
|
||||
* @brief Sets a batch of tensors for input data to infer by input port.
|
||||
* Model input must have batch dimension, and the number of @p tensors must match the batch size.
|
||||
* The current version supports setting tensors to model inputs only. If @p port is associated
|
||||
* with output (or any other non-input node), an exception is thrown.
|
||||
*
|
||||
* @param port Port of the input tensor.
|
||||
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
|
||||
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
|
||||
*/
|
||||
void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) override;
|
||||
|
||||
/**
|
||||
* @brief Plugin implementation for set tensors
|
||||
*
|
||||
* @param port Port of the input tensor.
|
||||
* @param tensors Input tensors for batched infer request. The type of each tensor must match the model
|
||||
* input element type and shape (except batch dimension). Total size of tensors must match the input size.
|
||||
*/
|
||||
virtual void set_tensors_impl(const ov::Output<const ov::Node> port, const std::vector<ov::Tensor>& tensors);
|
||||
|
||||
/**
|
||||
* @brief Gets inputs for infer request
|
||||
*
|
||||
* @return vector of input ports
|
||||
*/
|
||||
const std::vector<ov::Output<const ov::Node>>& get_inputs() const override;
|
||||
|
||||
/**
|
||||
* @brief Gets outputs for infer request
|
||||
*
|
||||
* @return vector of output ports
|
||||
*/
|
||||
const std::vector<ov::Output<const ov::Node>>& get_outputs() const override;
|
||||
|
||||
/**
|
||||
* @brief Gets pointer to compiled model (usually synchronous request holds the compiled model)
|
||||
*
|
||||
* @return Pointer to the compiled model
|
||||
*/
|
||||
const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const override;
|
||||
|
||||
protected:
|
||||
struct FoundPort {
|
||||
size_t idx;
|
||||
enum class Type { NOT_FOUND = 0, INPUT, OUTPUT } type;
|
||||
|
||||
bool found() {
|
||||
return type != Type::NOT_FOUND;
|
||||
}
|
||||
bool is_input() {
|
||||
return type == Type::INPUT;
|
||||
}
|
||||
bool is_output() {
|
||||
return !is_input();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Finds input or output port
|
||||
* @return structure which contains index of Input/Output or report that port wasn't found
|
||||
*/
|
||||
FoundPort find_port(const ov::Output<const ov::Node>& port) const;
|
||||
/**
|
||||
* @brief Converts batched tensors to tensor
|
||||
*/
|
||||
void convert_batched_tensors();
|
||||
/**
|
||||
* @brief Basic checks for input/output tensor
|
||||
*
|
||||
* @param port Input/Output port
|
||||
* @param tensor Input/Output tensor
|
||||
*/
|
||||
void check_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) const;
|
||||
|
||||
/**
|
||||
* @brief Check that all tensors are valid. Throws an exception if it's not.
|
||||
*/
|
||||
void check_tensors() const override;
|
||||
|
||||
std::vector<ov::Tensor> m_input_tensors;
|
||||
std::vector<ov::Tensor> m_output_tensors;
|
||||
std::unordered_map<size_t, std::vector<ov::Tensor>> m_batched_tensors;
|
||||
|
||||
private:
|
||||
std::shared_ptr<ov::ICompiledModel> m_compiled_model;
|
||||
};
|
||||
|
||||
}; // namespace ov
|
@ -19,20 +19,17 @@
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "openvino/runtime/variable_state.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
class IInferRequestInternal;
|
||||
} // namespace InferenceEngine
|
||||
|
||||
namespace ov {
|
||||
|
||||
class CompiledModel;
|
||||
class IAsyncInferRequest;
|
||||
|
||||
/**
|
||||
* @brief This is a class of infer request that can be run in asynchronous or synchronous manners.
|
||||
* @ingroup ov_runtime_cpp_api
|
||||
*/
|
||||
class OPENVINO_RUNTIME_API InferRequest {
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> _impl;
|
||||
std::shared_ptr<ov::IAsyncInferRequest> _impl;
|
||||
std::shared_ptr<void> _so;
|
||||
|
||||
/**
|
||||
@ -41,7 +38,7 @@ class OPENVINO_RUNTIME_API InferRequest {
|
||||
* @param so Plugin to use. This is required to ensure that InferRequest can work properly even if a plugin object
|
||||
* is destroyed.
|
||||
*/
|
||||
InferRequest(const std::shared_ptr<InferenceEngine::IInferRequestInternal>& impl, const std::shared_ptr<void>& so);
|
||||
InferRequest(const std::shared_ptr<ov::IAsyncInferRequest>& impl, const std::shared_ptr<void>& so);
|
||||
friend class ov::CompiledModel;
|
||||
|
||||
public:
|
||||
|
@ -31,6 +31,7 @@ class Core;
|
||||
class CoreImpl;
|
||||
class Plugin;
|
||||
class IPlugin;
|
||||
class ISyncInferRequest;
|
||||
class IInferencePluginWrapper;
|
||||
class IExecutableNetworkWrapper;
|
||||
class CompiledModel;
|
||||
@ -62,6 +63,7 @@ protected:
|
||||
friend class ov::CoreImpl;
|
||||
friend class ov::Plugin;
|
||||
friend class ov::IPlugin;
|
||||
friend class ov::ISyncInferRequest;
|
||||
friend class ov::IInferencePluginWrapper;
|
||||
friend class ov::IExecutableNetworkWrapper;
|
||||
friend class ov::CompiledModel;
|
||||
|
@ -17,11 +17,13 @@
|
||||
|
||||
namespace InferenceEngine {
|
||||
class IVariableStateInternal;
|
||||
class IAsyncInferRequestWrapper;
|
||||
} // namespace InferenceEngine
|
||||
|
||||
namespace ov {
|
||||
|
||||
class InferRequest;
|
||||
class IInferRequestInternalWrapper;
|
||||
|
||||
/**
|
||||
* @brief VariableState class
|
||||
@ -41,6 +43,8 @@ class OPENVINO_RUNTIME_API VariableState {
|
||||
const std::vector<std::shared_ptr<void>>& so);
|
||||
|
||||
friend class ov::InferRequest;
|
||||
friend class ov::IInferRequestInternalWrapper;
|
||||
friend class InferenceEngine::IAsyncInferRequestWrapper;
|
||||
|
||||
public:
|
||||
/**
|
||||
|
@ -19,24 +19,6 @@
|
||||
#include "openvino/runtime/infer_request.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
inline bool getPort(ov::Output<const ov::Node>& port,
|
||||
const std::string& name,
|
||||
const std::vector<std::vector<std::shared_ptr<const ov::Node>>>& ports) {
|
||||
for (const auto& nodes : ports) {
|
||||
for (const auto& node : nodes) {
|
||||
const auto& names = node->get_output_tensor(0).get_names();
|
||||
if (names.find(name) != names.end()) {
|
||||
port = node->output(0);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
namespace InferenceEngine {
|
||||
|
||||
#define INFER_REQ_CALL_STATEMENT(...) \
|
||||
@ -48,18 +30,6 @@ namespace InferenceEngine {
|
||||
::InferenceEngine::details::Rethrow(); \
|
||||
}
|
||||
|
||||
#define OV_INFER_REQ_CALL_STATEMENT(...) \
|
||||
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized."); \
|
||||
try { \
|
||||
__VA_ARGS__; \
|
||||
} catch (const ::InferenceEngine::RequestBusy& ex) { \
|
||||
throw ov::Busy(ex.what()); \
|
||||
} catch (const std::exception& ex) { \
|
||||
throw ov::Exception(ex.what()); \
|
||||
} catch (...) { \
|
||||
OPENVINO_ASSERT(false, "Unexpected exception"); \
|
||||
}
|
||||
|
||||
InferRequest::~InferRequest() {
|
||||
_impl = {};
|
||||
}
|
||||
@ -237,301 +207,3 @@ bool InferRequest::operator==(const InferRequest& r) const noexcept {
|
||||
}
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
||||
namespace {
|
||||
|
||||
std::string get_legacy_name_from_port(const ov::Output<const ov::Node>& port) {
|
||||
ov::Output<ngraph::Node> p(std::const_pointer_cast<ov::Node>(port.get_node_shared_ptr()), port.get_index());
|
||||
if (auto node = std::dynamic_pointer_cast<ov::op::v0::Result>(p.get_node_shared_ptr())) {
|
||||
p = node->input_value(0);
|
||||
}
|
||||
return ov::op::util::create_ie_output_name(p);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace ov {
|
||||
|
||||
InferRequest::~InferRequest() {
|
||||
_impl = {};
|
||||
}
|
||||
|
||||
InferRequest::InferRequest(const ie::IInferRequestInternal::Ptr& impl, const std::shared_ptr<void>& so)
|
||||
: _impl{impl},
|
||||
_so{so} {
|
||||
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
|
||||
}
|
||||
|
||||
void InferRequest::set_tensor(const ov::Output<const ov::Node>& port, const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({ _impl->SetBlob(get_legacy_name_from_port(port), tensor._impl); });
|
||||
}
|
||||
|
||||
void InferRequest::set_tensor(const ov::Output<ov::Node>& port, const Tensor& tensor) {
|
||||
set_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()), tensor);
|
||||
}
|
||||
|
||||
void InferRequest::set_tensor(const std::string& name, const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
ov::Output<const ov::Node> port;
|
||||
OPENVINO_ASSERT(::getPort(port, name, {_impl->GetInputs(), _impl->GetOutputs()}),
|
||||
"Port for tensor name " + name + " was not found.");
|
||||
set_tensor(port, tensor);
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::set_tensors(const std::string& name, const std::vector<Tensor>& tensors) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
ov::Output<const ov::Node> port;
|
||||
OPENVINO_ASSERT(::getPort(port, name, {_impl->GetInputs()}),
|
||||
"set_tensors error. Input port for tensor name ",
|
||||
name,
|
||||
" was not found.");
|
||||
set_tensors(port, tensors);
|
||||
})
|
||||
}
|
||||
|
||||
void InferRequest::set_tensors(const ov::Output<const ov::Node>& port, const std::vector<Tensor>& tensors) {
|
||||
auto impls = std::vector<InferenceEngine::Blob::Ptr>();
|
||||
std::transform(tensors.begin(), tensors.end(), std::back_inserter(impls), [](const Tensor& item) {
|
||||
return item._impl;
|
||||
});
|
||||
OV_INFER_REQ_CALL_STATEMENT({ _impl->SetBlobs(get_legacy_name_from_port(port), impls); })
|
||||
}
|
||||
|
||||
void InferRequest::set_input_tensor(size_t idx, const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto& inputs = _impl->GetInputs();
|
||||
OPENVINO_ASSERT(inputs.size() > idx,
|
||||
"Input port for index ",
|
||||
idx,
|
||||
" was not found! The model has only ",
|
||||
inputs.size(),
|
||||
" inputs.");
|
||||
set_tensor(inputs.at(idx)->output(0), tensor);
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::set_input_tensor(const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto inputs = _impl->GetInputs();
|
||||
OPENVINO_ASSERT(inputs.size() == 1,
|
||||
"set_input_tensor() must be called on a function with exactly one parameter.");
|
||||
set_tensor(inputs.at(0)->output(0), tensor);
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::set_input_tensors(size_t idx, const std::vector<Tensor>& tensors) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
OPENVINO_ASSERT(idx < _impl->GetInputs().size(),
|
||||
"set_input_tensors error. Input port for index ",
|
||||
idx,
|
||||
" is out of bounds. Model has only ",
|
||||
_impl->GetInputs().size(),
|
||||
" inputs");
|
||||
set_tensors(_impl->GetInputs().at(idx)->output(0), tensors);
|
||||
})
|
||||
}
|
||||
|
||||
void InferRequest::set_input_tensors(const std::vector<Tensor>& tensors) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
OPENVINO_ASSERT(_impl->GetInputs().size() == 1,
|
||||
"set_input_tensors(tensors) must be used for single-input models only. Model has ",
|
||||
_impl->GetInputs().size(),
|
||||
" inputs");
|
||||
set_tensors(_impl->GetInputs().at(0)->output(0), tensors);
|
||||
})
|
||||
}
|
||||
|
||||
void InferRequest::set_output_tensor(size_t idx, const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto& outputs = _impl->GetOutputs();
|
||||
OPENVINO_ASSERT(outputs.size() > idx,
|
||||
"Output port for index ",
|
||||
idx,
|
||||
" was not found! The model has only ",
|
||||
outputs.size(),
|
||||
" outputs.");
|
||||
set_tensor(outputs.at(idx)->output(0), tensor);
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::set_output_tensor(const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto outputs = _impl->GetOutputs();
|
||||
OPENVINO_ASSERT(outputs.size() == 1,
|
||||
"set_output_tensor() must be called on a function with exactly one parameter.");
|
||||
set_tensor(outputs.at(0)->output(0), tensor);
|
||||
});
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_tensor(const ov::Output<const ov::Node>& port) {
|
||||
std::vector<std::shared_ptr<void>> soVec;
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto& name = get_legacy_name_from_port(port);
|
||||
OPENVINO_ASSERT(!_impl->GetBlobs(name),
|
||||
"get_tensor shall not be used together with batched "
|
||||
"set_tensors/set_input_tensors for name '",
|
||||
name,
|
||||
"'");
|
||||
auto blob = _impl->GetBlob(name);
|
||||
soVec = {_so, _impl->getPointerToSo()};
|
||||
Tensor tensor = {blob, soVec};
|
||||
return tensor;
|
||||
});
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_tensor(const ov::Output<ov::Node>& port) {
|
||||
return get_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()));
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_tensor(const std::string& name) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
ov::Output<const ov::Node> port;
|
||||
OPENVINO_ASSERT(::getPort(port, name, {_impl->GetInputs(), _impl->GetOutputs()}),
|
||||
"Port for tensor name " + name + " was not found.");
|
||||
return get_tensor(port);
|
||||
});
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_input_tensor(size_t idx) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->GetInputs().at(idx)->output(0)); });
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_output_tensor(size_t idx) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->GetOutputs().at(idx)->output(0)); });
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_input_tensor() {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto inputs = _impl->GetInputs();
|
||||
if (inputs.size() != 1) {
|
||||
throw ov::Exception("get_input_tensor() must be called on a function with exactly one parameter.");
|
||||
}
|
||||
return get_tensor(inputs.at(0)->output(0));
|
||||
});
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_output_tensor() {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto outputs = _impl->GetOutputs();
|
||||
if (outputs.size() != 1) {
|
||||
throw ov::Exception("get_output_tensor() must be called on a function with exactly one parameter.");
|
||||
}
|
||||
return get_tensor(outputs.at(0)->output(0));
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::infer() {
|
||||
OV_INFER_REQ_CALL_STATEMENT(_impl->Infer();)
|
||||
}
|
||||
|
||||
void InferRequest::cancel() {
|
||||
OV_INFER_REQ_CALL_STATEMENT(_impl->Cancel();)
|
||||
}
|
||||
|
||||
std::vector<ProfilingInfo> InferRequest::get_profiling_info() const {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
auto ieInfos = _impl->GetPerformanceCounts();
|
||||
std::vector<ProfilingInfo> infos;
|
||||
infos.reserve(ieInfos.size());
|
||||
while (!ieInfos.empty()) {
|
||||
auto itIeInfo = std::min_element(
|
||||
std::begin(ieInfos),
|
||||
std::end(ieInfos),
|
||||
[](const decltype(ieInfos)::value_type& lhs, const decltype(ieInfos)::value_type& rhs) {
|
||||
return lhs.second.execution_index < rhs.second.execution_index;
|
||||
});
|
||||
IE_ASSERT(itIeInfo != ieInfos.end());
|
||||
auto& ieInfo = itIeInfo->second;
|
||||
infos.push_back(ProfilingInfo{});
|
||||
auto& info = infos.back();
|
||||
switch (ieInfo.status) {
|
||||
case ie::InferenceEngineProfileInfo::NOT_RUN:
|
||||
info.status = ProfilingInfo::Status::NOT_RUN;
|
||||
break;
|
||||
case ie::InferenceEngineProfileInfo::OPTIMIZED_OUT:
|
||||
info.status = ProfilingInfo::Status::OPTIMIZED_OUT;
|
||||
break;
|
||||
case ie::InferenceEngineProfileInfo::EXECUTED:
|
||||
info.status = ProfilingInfo::Status::EXECUTED;
|
||||
break;
|
||||
}
|
||||
info.real_time = std::chrono::microseconds{ieInfo.realTime_uSec};
|
||||
info.cpu_time = std::chrono::microseconds{ieInfo.cpu_uSec};
|
||||
info.node_name = itIeInfo->first;
|
||||
info.exec_type = std::string{ieInfo.exec_type};
|
||||
info.node_type = std::string{ieInfo.layer_type};
|
||||
ieInfos.erase(itIeInfo);
|
||||
}
|
||||
return infos;
|
||||
})
|
||||
}
|
||||
|
||||
void InferRequest::start_async() {
|
||||
OV_INFER_REQ_CALL_STATEMENT(_impl->StartAsync();)
|
||||
}
|
||||
|
||||
void InferRequest::wait() {
|
||||
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
|
||||
try {
|
||||
_impl->Wait(ie::InferRequest::RESULT_READY);
|
||||
} catch (const ie::InferCancelled& e) {
|
||||
throw Cancelled{e.what()};
|
||||
} catch (const std::exception& ex) {
|
||||
throw Exception(ex.what());
|
||||
} catch (...) {
|
||||
OPENVINO_UNREACHABLE("Unexpected exception");
|
||||
}
|
||||
}
|
||||
|
||||
bool InferRequest::wait_for(const std::chrono::milliseconds timeout) {
|
||||
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
|
||||
try {
|
||||
return _impl->Wait(timeout.count()) == ie::OK;
|
||||
} catch (const ie::InferCancelled& e) {
|
||||
throw Cancelled{e.what()};
|
||||
} catch (const std::exception& ex) {
|
||||
throw Exception(ex.what());
|
||||
} catch (...) {
|
||||
OPENVINO_UNREACHABLE("Unexpected exception");
|
||||
}
|
||||
}
|
||||
|
||||
void InferRequest::set_callback(std::function<void(std::exception_ptr)> callback) {
|
||||
OV_INFER_REQ_CALL_STATEMENT(_impl->SetCallback(std::move(callback));)
|
||||
}
|
||||
|
||||
std::vector<VariableState> InferRequest::query_state() {
|
||||
std::vector<VariableState> variable_states;
|
||||
std::vector<std::shared_ptr<void>> soVec;
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
soVec = {_so, _impl->getPointerToSo()};
|
||||
for (auto&& state : _impl->QueryState()) {
|
||||
variable_states.emplace_back(VariableState{state, soVec});
|
||||
}
|
||||
})
|
||||
return variable_states;
|
||||
}
|
||||
|
||||
CompiledModel InferRequest::get_compiled_model() {
|
||||
OV_INFER_REQ_CALL_STATEMENT(
|
||||
return {ov::legacy_convert::convert_compiled_model(_impl->getPointerToExecutableNetworkInternal()), _so});
|
||||
}
|
||||
|
||||
bool InferRequest::operator!() const noexcept {
|
||||
return !_impl;
|
||||
}
|
||||
|
||||
InferRequest::operator bool() const noexcept {
|
||||
return (!!_impl);
|
||||
}
|
||||
|
||||
bool InferRequest::operator!=(const InferRequest& r) const noexcept {
|
||||
return !(r == *this);
|
||||
}
|
||||
|
||||
bool InferRequest::operator==(const InferRequest& r) const noexcept {
|
||||
return r._impl == _impl;
|
||||
}
|
||||
|
||||
} // namespace ov
|
||||
|
@ -4,36 +4,48 @@
|
||||
|
||||
#include "converter_utils.hpp"
|
||||
|
||||
#include <ie_blob.h>
|
||||
#include <ie_common.h>
|
||||
#include <ie_compound_blob.h>
|
||||
#include <ie_layouts.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <ie_input_info.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
#include <ie_version.hpp>
|
||||
#include <memory>
|
||||
#include <openvino/core/except.hpp>
|
||||
#include <openvino/op/parameter.hpp>
|
||||
#include <openvino/runtime/exception.hpp>
|
||||
#include <openvino/runtime/remote_context.hpp>
|
||||
#include <openvino/runtime/tensor.hpp>
|
||||
#include <mutex>
|
||||
|
||||
#include "any_copy.hpp"
|
||||
#include "cnn_network_ngraph_impl.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
|
||||
#include "icompiled_model_wrapper.hpp"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_common.h"
|
||||
#include "ie_compound_blob.h"
|
||||
#include "ie_icore.hpp"
|
||||
#include "ie_input_info.hpp"
|
||||
#include "ie_layouts.h"
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "ie_version.hpp"
|
||||
#include "iplugin_wrapper.hpp"
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/op/parameter.hpp"
|
||||
#include "openvino/runtime/exception.hpp"
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "openvino/runtime/iinfer_request.hpp"
|
||||
#include "openvino/runtime/iplugin.hpp"
|
||||
#include "openvino/runtime/profiling_info.hpp"
|
||||
#include "openvino/runtime/remote_context.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "openvino/runtime/variable_state.hpp"
|
||||
#include "so_ptr.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
std::string get_legacy_name_from_port(const ov::Output<const ov::Node>& port) {
|
||||
ov::Output<ngraph::Node> p(std::const_pointer_cast<ov::Node>(port.get_node_shared_ptr()), port.get_index());
|
||||
if (auto node = std::dynamic_pointer_cast<ov::op::v0::Result>(p.get_node_shared_ptr())) {
|
||||
p = node->input_value(0);
|
||||
}
|
||||
return ov::op::util::create_ie_output_name(p);
|
||||
}
|
||||
|
||||
void fill_input_info(ov::Output<ov::Node>& input, InferenceEngine::InputInfo::Ptr& input_info) {
|
||||
const ov::Output<const ov::Node> const_input(input.get_node(), input.get_index());
|
||||
ov::legacy_convert::fill_input_info(const_input, input_info);
|
||||
@ -341,7 +353,9 @@ public:
|
||||
}
|
||||
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> CreateInferRequest() override {
|
||||
return m_model->create_infer_request();
|
||||
auto infer_request = legacy_convert::convert_infer_request(m_model->create_infer_request());
|
||||
infer_request->setPointerToExecutableNetworkInternal(shared_from_this());
|
||||
return infer_request;
|
||||
}
|
||||
|
||||
void Export(std::ostream& model) override {
|
||||
@ -397,3 +411,312 @@ std::shared_ptr<ov::ICompiledModel> ov::legacy_convert::convert_compiled_model(
|
||||
}
|
||||
return std::make_shared<InferenceEngine::ICompiledModelWrapper>(model);
|
||||
}
|
||||
|
||||
namespace ov {
|
||||
|
||||
class IInferRequestInternalWrapper : public InferenceEngine::IInferRequestInternal {
|
||||
ov::Output<const ov::Node> find_port(const std::string& legacy_name) const {
|
||||
for (const auto& port : m_request->get_inputs()) {
|
||||
if (get_legacy_name_from_port(port) == legacy_name)
|
||||
return port;
|
||||
}
|
||||
for (const auto& port : m_request->get_outputs()) {
|
||||
if (get_legacy_name_from_port(port) == legacy_name)
|
||||
return port;
|
||||
}
|
||||
OPENVINO_ASSERT(false, "Cannot find port with name: ", legacy_name);
|
||||
}
|
||||
|
||||
public:
|
||||
explicit IInferRequestInternalWrapper(const std::shared_ptr<ov::IAsyncInferRequest>& request)
|
||||
: m_request(request) {}
|
||||
|
||||
void Infer() override {
|
||||
m_request->infer();
|
||||
}
|
||||
|
||||
void Cancel() override {
|
||||
m_request->cancel();
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override {
|
||||
auto res = m_request->get_profiling_info();
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> ret;
|
||||
for (const auto& info : res) {
|
||||
InferenceEngine::InferenceEngineProfileInfo old_info;
|
||||
old_info.cpu_uSec = info.cpu_time.count();
|
||||
old_info.realTime_uSec = info.real_time.count();
|
||||
strncpy(old_info.exec_type, info.exec_type.c_str(), sizeof(old_info.exec_type));
|
||||
old_info.exec_type[sizeof(old_info.exec_type) - 1] = 0;
|
||||
strncpy(old_info.layer_type, info.node_type.c_str(), sizeof(old_info.layer_type));
|
||||
old_info.layer_type[sizeof(old_info.layer_type) - 1] = 0;
|
||||
switch (info.status) {
|
||||
case ov::ProfilingInfo::Status::EXECUTED:
|
||||
old_info.status = InferenceEngine::InferenceEngineProfileInfo::EXECUTED;
|
||||
break;
|
||||
case ov::ProfilingInfo::Status::NOT_RUN:
|
||||
old_info.status = InferenceEngine::InferenceEngineProfileInfo::NOT_RUN;
|
||||
break;
|
||||
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
|
||||
old_info.status = InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT;
|
||||
break;
|
||||
}
|
||||
ret[info.node_name] = old_info;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override {
|
||||
m_request->set_tensor(find_port(name), ov::Tensor{data, {}});
|
||||
}
|
||||
|
||||
void SetBlobs(const std::string& name, const std::vector<InferenceEngine::Blob::Ptr>& blobs) override {
|
||||
std::vector<ov::Tensor> tensors;
|
||||
for (const auto& blob : blobs) {
|
||||
tensors.emplace_back(ov::Tensor{blob, {}});
|
||||
}
|
||||
m_request->set_tensors(find_port(name), tensors);
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override {
|
||||
return m_request->get_tensor(find_port(name))._impl;
|
||||
}
|
||||
|
||||
InferenceEngine::BatchedBlob::Ptr GetBlobs(const std::string& name) override {
|
||||
auto tensors = m_request->get_tensors(find_port(name));
|
||||
std::vector<InferenceEngine::Blob::Ptr> blobs;
|
||||
for (const auto& tensor : tensors) {
|
||||
blobs.emplace_back(tensor._impl);
|
||||
}
|
||||
return std::make_shared<InferenceEngine::BatchedBlob>(blobs);
|
||||
}
|
||||
|
||||
void SetBlob(const std::string& name,
|
||||
const InferenceEngine::Blob::Ptr& data,
|
||||
const InferenceEngine::PreProcessInfo& info) override {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
const InferenceEngine::PreProcessInfo& GetPreProcess(const std::string& name) const override {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
void SetBatch(int batch) override {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState() override {
|
||||
auto res = m_request->query_state();
|
||||
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> ret;
|
||||
for (const auto& state : res) {
|
||||
ret.emplace_back(state._impl);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void StartAsync() override {
|
||||
m_request->start_async();
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode Wait(int64_t millis_timeout) override {
|
||||
if (millis_timeout == InferenceEngine::IInferRequest::RESULT_READY) {
|
||||
m_request->wait();
|
||||
} else {
|
||||
std::chrono::milliseconds timeout(millis_timeout);
|
||||
bool res = m_request->wait_for(timeout);
|
||||
if (!res)
|
||||
return InferenceEngine::StatusCode::RESULT_NOT_READY;
|
||||
}
|
||||
return InferenceEngine::StatusCode::OK;
|
||||
}
|
||||
|
||||
void SetCallback(std::function<void(std::exception_ptr)> callback) override {
|
||||
m_request->set_callback(std::move(callback));
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::IAsyncInferRequest> get_infer_request() {
|
||||
return m_request;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<ov::IAsyncInferRequest> m_request;
|
||||
};
|
||||
|
||||
} // namespace ov
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
class IAsyncInferRequestWrapper : public ov::IAsyncInferRequest {
|
||||
public:
|
||||
IAsyncInferRequestWrapper(const std::shared_ptr<InferenceEngine::IInferRequestInternal>& request)
|
||||
: ov::IAsyncInferRequest(nullptr, nullptr, nullptr),
|
||||
m_request(request) {
|
||||
if (m_request->getPointerToExecutableNetworkInternal())
|
||||
m_compiled_model =
|
||||
ov::legacy_convert::convert_compiled_model(m_request->getPointerToExecutableNetworkInternal());
|
||||
}
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> get_infer_request() {
|
||||
return m_request;
|
||||
}
|
||||
|
||||
void infer() override {
|
||||
m_request->Infer();
|
||||
}
|
||||
void start_async() override {
|
||||
m_request->StartAsync();
|
||||
}
|
||||
|
||||
void wait() override {
|
||||
try {
|
||||
m_request->Wait(InferenceEngine::InferRequest::RESULT_READY);
|
||||
} catch (const ov::Cancelled&) {
|
||||
throw;
|
||||
} catch (const InferenceEngine::InferCancelled& e) {
|
||||
throw ov::Cancelled{e.what()};
|
||||
} catch (const std::exception& ex) {
|
||||
throw ov::Exception(ex.what());
|
||||
} catch (...) {
|
||||
OPENVINO_UNREACHABLE("Unexpected exception");
|
||||
}
|
||||
}
|
||||
bool wait_for(const std::chrono::milliseconds& timeout) override {
|
||||
try {
|
||||
return m_request->Wait(timeout.count()) == InferenceEngine::OK;
|
||||
} catch (const InferenceEngine::InferCancelled& e) {
|
||||
throw ov::Cancelled{e.what()};
|
||||
} catch (const std::exception& ex) {
|
||||
throw Exception(ex.what());
|
||||
} catch (...) {
|
||||
OPENVINO_UNREACHABLE("Unexpected exception");
|
||||
}
|
||||
}
|
||||
|
||||
void cancel() override {
|
||||
m_request->Cancel();
|
||||
}
|
||||
|
||||
std::vector<ov::ProfilingInfo> get_profiling_info() const override {
|
||||
auto ieInfos = m_request->GetPerformanceCounts();
|
||||
std::vector<ov::ProfilingInfo> infos;
|
||||
infos.reserve(ieInfos.size());
|
||||
while (!ieInfos.empty()) {
|
||||
auto itIeInfo = std::min_element(
|
||||
std::begin(ieInfos),
|
||||
std::end(ieInfos),
|
||||
[](const decltype(ieInfos)::value_type& lhs, const decltype(ieInfos)::value_type& rhs) {
|
||||
return lhs.second.execution_index < rhs.second.execution_index;
|
||||
});
|
||||
IE_ASSERT(itIeInfo != ieInfos.end());
|
||||
auto& ieInfo = itIeInfo->second;
|
||||
infos.push_back(ov::ProfilingInfo{});
|
||||
auto& info = infos.back();
|
||||
switch (ieInfo.status) {
|
||||
case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
|
||||
info.status = ov::ProfilingInfo::Status::NOT_RUN;
|
||||
break;
|
||||
case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
|
||||
info.status = ov::ProfilingInfo::Status::OPTIMIZED_OUT;
|
||||
break;
|
||||
case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
|
||||
info.status = ov::ProfilingInfo::Status::EXECUTED;
|
||||
break;
|
||||
}
|
||||
info.real_time = std::chrono::microseconds{ieInfo.realTime_uSec};
|
||||
info.cpu_time = std::chrono::microseconds{ieInfo.cpu_uSec};
|
||||
info.node_name = itIeInfo->first;
|
||||
info.exec_type = std::string{ieInfo.exec_type};
|
||||
info.node_type = std::string{ieInfo.layer_type};
|
||||
ieInfos.erase(itIeInfo);
|
||||
}
|
||||
return infos;
|
||||
}
|
||||
|
||||
ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const override {
|
||||
const auto& name = get_legacy_name_from_port(port);
|
||||
OPENVINO_ASSERT(!m_request->GetBlobs(name),
|
||||
"get_tensor shall not be used together with batched "
|
||||
"set_tensors/set_input_tensors for name '",
|
||||
name,
|
||||
"'");
|
||||
auto blob = m_request->GetBlob(name);
|
||||
ov::Tensor tensor = {blob, {m_request->getPointerToSo()}};
|
||||
return tensor;
|
||||
}
|
||||
void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override {
|
||||
m_request->SetBlob(get_legacy_name_from_port(port), tensor._impl);
|
||||
}
|
||||
|
||||
std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const override {
|
||||
auto blobs = m_request->GetBlobs(get_legacy_name_from_port(port));
|
||||
std::vector<ov::Tensor> ret;
|
||||
if (!blobs)
|
||||
return ret;
|
||||
for (size_t i = 0; i < blobs->size(); i++) {
|
||||
ret.emplace_back(ov::Tensor{blobs->getBlob(i), {m_request->getPointerToSo()}});
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) override {
|
||||
std::vector<InferenceEngine::Blob::Ptr> blobs;
|
||||
for (const auto& tensor : tensors) {
|
||||
blobs.emplace_back(tensor._impl);
|
||||
}
|
||||
m_request->SetBlobs(get_legacy_name_from_port(port), blobs);
|
||||
}
|
||||
|
||||
std::vector<ov::VariableState> query_state() const override {
|
||||
std::vector<ov::VariableState> variable_states;
|
||||
std::vector<std::shared_ptr<void>> soVec;
|
||||
soVec = {m_request->getPointerToSo()};
|
||||
for (auto&& state : m_request->QueryState()) {
|
||||
variable_states.emplace_back(ov::VariableState{state, soVec});
|
||||
}
|
||||
return variable_states;
|
||||
}
|
||||
|
||||
void set_callback(std::function<void(std::exception_ptr)> callback) override {
|
||||
m_request->SetCallback(std::move(callback));
|
||||
}
|
||||
|
||||
const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const override {
|
||||
if (!m_compiled_model) {
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
if (!m_compiled_model) {
|
||||
if (m_request->getPointerToExecutableNetworkInternal())
|
||||
m_compiled_model =
|
||||
ov::legacy_convert::convert_compiled_model(m_request->getPointerToExecutableNetworkInternal());
|
||||
}
|
||||
}
|
||||
OPENVINO_ASSERT(m_compiled_model);
|
||||
return m_compiled_model;
|
||||
}
|
||||
|
||||
const std::vector<ov::Output<const ov::Node>>& get_inputs() const override {
|
||||
return get_compiled_model()->inputs();
|
||||
}
|
||||
const std::vector<ov::Output<const ov::Node>>& get_outputs() const override {
|
||||
return get_compiled_model()->outputs();
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> m_request;
|
||||
mutable std::shared_ptr<ov::ICompiledModel> m_compiled_model;
|
||||
mutable std::mutex m_mutex;
|
||||
};
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
||||
std::shared_ptr<::InferenceEngine::IInferRequestInternal> ov::legacy_convert::convert_infer_request(
|
||||
const std::shared_ptr<::ov::IAsyncInferRequest>& request) {
|
||||
if (auto comp_model = std::dynamic_pointer_cast<InferenceEngine::IAsyncInferRequestWrapper>(request)) {
|
||||
return comp_model->get_infer_request();
|
||||
}
|
||||
return std::make_shared<ov::IInferRequestInternalWrapper>(request);
|
||||
}
|
||||
std::shared_ptr<::ov::IAsyncInferRequest> ov::legacy_convert::convert_infer_request(
|
||||
const std::shared_ptr<::InferenceEngine::IInferRequestInternal>& request) {
|
||||
if (auto comp_model = std::dynamic_pointer_cast<ov::IInferRequestInternalWrapper>(request)) {
|
||||
return comp_model->get_infer_request();
|
||||
}
|
||||
return std::make_shared<InferenceEngine::IAsyncInferRequestWrapper>(request);
|
||||
}
|
||||
|
@ -5,8 +5,10 @@
|
||||
#pragma once
|
||||
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
|
||||
#include "openvino/core/model.hpp"
|
||||
#include "openvino/runtime/iasync_infer_request.hpp"
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "openvino/runtime/iplugin.hpp"
|
||||
|
||||
@ -22,11 +24,18 @@ std::shared_ptr<const ov::Model> convert_model(const InferenceEngine::CNNNetwork
|
||||
std::shared_ptr<::InferenceEngine::IInferencePlugin> convert_plugin(const std::shared_ptr<::ov::IPlugin>& plugin);
|
||||
std::shared_ptr<::ov::IPlugin> convert_plugin(const std::shared_ptr<::InferenceEngine::IInferencePlugin>& plugin);
|
||||
|
||||
std::shared_ptr<::InferenceEngine::IExecutableNetworkInternal> convert_compiled_model(
|
||||
// TODO: remove export after changes in template plugin
|
||||
OPENVINO_RUNTIME_API std::shared_ptr<::InferenceEngine::IExecutableNetworkInternal> convert_compiled_model(
|
||||
const std::shared_ptr<::ov::ICompiledModel>& model);
|
||||
std::shared_ptr<::ov::ICompiledModel> convert_compiled_model(
|
||||
const std::shared_ptr<::InferenceEngine::IExecutableNetworkInternal>& model);
|
||||
|
||||
// TODO: remove export after changes in template plugin
|
||||
OPENVINO_RUNTIME_API std::shared_ptr<::InferenceEngine::IInferRequestInternal> convert_infer_request(
|
||||
const std::shared_ptr<::ov::IAsyncInferRequest>& request);
|
||||
OPENVINO_RUNTIME_API std::shared_ptr<::ov::IAsyncInferRequest> convert_infer_request(
|
||||
const std::shared_ptr<::InferenceEngine::IInferRequestInternal>& request);
|
||||
|
||||
} // namespace legacy_convert
|
||||
} // namespace ov
|
||||
|
||||
|
266
src/inference/src/dev/iasync_infer_request.cpp
Normal file
266
src/inference/src/dev/iasync_infer_request.cpp
Normal file
@ -0,0 +1,266 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/runtime/iasync_infer_request.hpp"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "openvino/runtime/isync_infer_request.hpp"
|
||||
#include "openvino/runtime/variable_state.hpp"
|
||||
#include "threading/ie_immediate_executor.hpp"
|
||||
#include "threading/ie_istreams_executor.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
struct ImmediateStreamsExecutor : public InferenceEngine::ITaskExecutor {
|
||||
explicit ImmediateStreamsExecutor(const InferenceEngine::IStreamsExecutor::Ptr& streamsExecutor)
|
||||
: _streamsExecutor{streamsExecutor} {}
|
||||
void run(InferenceEngine::Task task) override {
|
||||
_streamsExecutor->Execute(std::move(task));
|
||||
}
|
||||
InferenceEngine::IStreamsExecutor::Ptr _streamsExecutor;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
ov::IAsyncInferRequest::~IAsyncInferRequest() {
|
||||
stop_and_wait();
|
||||
}
|
||||
|
||||
ov::IAsyncInferRequest::IAsyncInferRequest(const std::shared_ptr<IInferRequest>& request,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& task_executor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callback_executor)
|
||||
: m_sync_request(request),
|
||||
m_request_executor(task_executor),
|
||||
m_callback_executor(callback_executor) {
|
||||
if (m_request_executor && m_sync_request)
|
||||
m_pipeline = {{m_request_executor, [this] {
|
||||
m_sync_request->infer();
|
||||
}}};
|
||||
if (m_sync_request)
|
||||
m_sync_pipeline = {{std::make_shared<InferenceEngine::ImmediateExecutor>(), [this] {
|
||||
m_sync_request->infer();
|
||||
}}};
|
||||
auto streams_executor = std::dynamic_pointer_cast<InferenceEngine::IStreamsExecutor>(m_request_executor);
|
||||
if (streams_executor != nullptr) {
|
||||
m_sync_pipeline = {{std::make_shared<ImmediateStreamsExecutor>(std::move(streams_executor)), [this] {
|
||||
m_sync_request->infer();
|
||||
}}};
|
||||
}
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::wait() {
|
||||
// Just use the last '_futures' member to wait pipeline completion
|
||||
auto future = [&] {
|
||||
std::lock_guard<std::mutex> lock{m_mutex};
|
||||
return m_futures.empty() ? std::shared_future<void>{} : m_futures.back();
|
||||
}();
|
||||
|
||||
if (!future.valid()) {
|
||||
return;
|
||||
}
|
||||
|
||||
future.wait();
|
||||
}
|
||||
|
||||
bool ov::IAsyncInferRequest::wait_for(const std::chrono::milliseconds& timeout) {
|
||||
OPENVINO_ASSERT(timeout >= std::chrono::milliseconds{0}, "Timeout can't be less than 0 for InferRequest::wait().");
|
||||
auto status = std::future_status::deferred;
|
||||
|
||||
// Just use the last '_futures' member to wait pipeline completion
|
||||
auto future = [&] {
|
||||
std::lock_guard<std::mutex> lock{m_mutex};
|
||||
return m_futures.empty() ? std::shared_future<void>{} : m_futures.back();
|
||||
}();
|
||||
|
||||
if (!future.valid()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
status = future.wait_for(std::chrono::milliseconds{timeout});
|
||||
|
||||
if (std::future_status::ready == status) {
|
||||
future.get();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::cancel() {
|
||||
std::lock_guard<std::mutex> lock{m_mutex};
|
||||
if (m_state == InferState::BUSY) {
|
||||
m_state = InferState::CANCELLED;
|
||||
}
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::set_callback(std::function<void(std::exception_ptr)> callback) {
|
||||
check_state();
|
||||
m_callback = std::move(callback);
|
||||
}
|
||||
|
||||
std::vector<ov::VariableState> ov::IAsyncInferRequest::query_state() const {
|
||||
check_state();
|
||||
return m_sync_request->query_state();
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::infer_thread_unsafe() {
|
||||
run_first_stage(m_sync_pipeline.begin(), m_sync_pipeline.end(), m_sync_callback_executor);
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::start_async_thread_unsafe() {
|
||||
run_first_stage(m_pipeline.begin(), m_pipeline.end(), m_callback_executor);
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::run_first_stage(const Pipeline::iterator itBeginStage,
|
||||
const Pipeline::iterator itEndStage,
|
||||
const InferenceEngine::ITaskExecutor::Ptr callbackExecutor) {
|
||||
auto& firstStageExecutor = std::get<Stage_e::EXECUTOR>(*itBeginStage);
|
||||
OPENVINO_ASSERT(nullptr != firstStageExecutor);
|
||||
firstStageExecutor->run(make_next_stage_task(itBeginStage, itEndStage, std::move(callbackExecutor)));
|
||||
}
|
||||
|
||||
InferenceEngine::Task ov::IAsyncInferRequest::make_next_stage_task(
|
||||
const Pipeline::iterator itStage,
|
||||
const Pipeline::iterator itEndStage,
|
||||
const InferenceEngine::ITaskExecutor::Ptr callbackExecutor) {
|
||||
return std::bind(
|
||||
[this, itStage, itEndStage](InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) mutable {
|
||||
std::exception_ptr currentException = nullptr;
|
||||
auto& thisStage = *itStage;
|
||||
auto itNextStage = itStage + 1;
|
||||
try {
|
||||
auto& stageTask = std::get<Stage_e::TASK>(thisStage);
|
||||
OPENVINO_ASSERT(nullptr != stageTask);
|
||||
stageTask();
|
||||
if (itEndStage != itNextStage) {
|
||||
auto& nextStage = *itNextStage;
|
||||
auto& nextStageExecutor = std::get<Stage_e::EXECUTOR>(nextStage);
|
||||
OPENVINO_ASSERT(nullptr != nextStageExecutor);
|
||||
nextStageExecutor->run(make_next_stage_task(itNextStage, itEndStage, std::move(callbackExecutor)));
|
||||
}
|
||||
} catch (...) {
|
||||
currentException = std::current_exception();
|
||||
}
|
||||
|
||||
if ((itEndStage == itNextStage) || (nullptr != currentException)) {
|
||||
auto lastStageTask = [this, currentException]() mutable {
|
||||
auto promise = std::move(m_promise);
|
||||
std::function<void(std::exception_ptr)> callback;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{m_mutex};
|
||||
m_state = InferState::IDLE;
|
||||
std::swap(callback, m_callback);
|
||||
}
|
||||
if (callback) {
|
||||
try {
|
||||
callback(currentException);
|
||||
} catch (...) {
|
||||
currentException = std::current_exception();
|
||||
}
|
||||
std::lock_guard<std::mutex> lock{m_mutex};
|
||||
if (!m_callback) {
|
||||
std::swap(callback, m_callback);
|
||||
}
|
||||
}
|
||||
if (nullptr == currentException) {
|
||||
promise.set_value();
|
||||
} else {
|
||||
promise.set_exception(currentException);
|
||||
}
|
||||
};
|
||||
|
||||
if (nullptr == callbackExecutor) {
|
||||
lastStageTask();
|
||||
} else {
|
||||
callbackExecutor->run(std::move(lastStageTask));
|
||||
}
|
||||
}
|
||||
},
|
||||
std::move(callbackExecutor));
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::start_async() {
|
||||
infer_impl([&] {
|
||||
start_async_thread_unsafe();
|
||||
});
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::check_state() const {
|
||||
std::lock_guard<std::mutex> lock{m_mutex};
|
||||
switch (m_state) {
|
||||
case InferState::BUSY:
|
||||
throw ov::Busy("Infer Request is busy");
|
||||
case InferState::CANCELLED:
|
||||
throw ov::Cancelled("Infer Request was canceled");
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ov::ProfilingInfo> ov::IAsyncInferRequest::get_profiling_info() const {
|
||||
check_state();
|
||||
return m_sync_request->get_profiling_info();
|
||||
}
|
||||
|
||||
ov::Tensor ov::IAsyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
|
||||
check_state();
|
||||
return m_sync_request->get_tensor(port);
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) {
|
||||
check_state();
|
||||
return m_sync_request->set_tensor(port, tensor);
|
||||
}
|
||||
|
||||
std::vector<ov::Tensor> ov::IAsyncInferRequest::get_tensors(const ov::Output<const ov::Node>& port) const {
|
||||
check_state();
|
||||
return m_sync_request->get_tensors(port);
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
|
||||
const std::vector<ov::Tensor>& tensors) {
|
||||
check_state();
|
||||
return m_sync_request->set_tensors(port, tensors);
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::stop_and_wait() {
|
||||
Futures futures;
|
||||
InferState state = InferState::IDLE;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{m_mutex};
|
||||
state = m_state;
|
||||
if (state != InferState::STOP) {
|
||||
m_callback = {};
|
||||
m_state = InferState::STOP;
|
||||
futures = std::move(m_futures);
|
||||
}
|
||||
}
|
||||
if (state != InferState::STOP) {
|
||||
for (auto&& future : futures) {
|
||||
if (future.valid()) {
|
||||
future.wait();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::infer() {
|
||||
m_sync_request->infer();
|
||||
}
|
||||
|
||||
void ov::IAsyncInferRequest::check_tensors() const {
|
||||
m_sync_request->check_tensors();
|
||||
}
|
||||
|
||||
const std::shared_ptr<ov::ICompiledModel>& ov::IAsyncInferRequest::get_compiled_model() const {
|
||||
return m_sync_request->get_compiled_model();
|
||||
}
|
||||
|
||||
const std::vector<ov::Output<const ov::Node>>& ov::IAsyncInferRequest::get_inputs() const {
|
||||
return m_sync_request->get_inputs();
|
||||
}
|
||||
const std::vector<ov::Output<const ov::Node>>& ov::IAsyncInferRequest::get_outputs() const {
|
||||
return m_sync_request->get_outputs();
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@ -78,7 +78,8 @@ const std::vector<ov::Output<const ov::Node>>& ov::ICompiledModel::outputs() con
|
||||
const std::vector<ov::Output<const ov::Node>>& ov::ICompiledModel::inputs() const {
|
||||
return m_inputs;
|
||||
}
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> ov::ICompiledModel::create_infer_request() const {
|
||||
|
||||
std::shared_ptr<ov::IAsyncInferRequest> ov::ICompiledModel::create_infer_request() const {
|
||||
return create_async_infer_request();
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,8 @@
|
||||
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "dev/converter_utils.hpp"
|
||||
|
||||
InferenceEngine::ICompiledModelWrapper::ICompiledModelWrapper(
|
||||
const std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>& model)
|
||||
: ov::ICompiledModel(nullptr, ov::legacy_convert::convert_plugin(model->_plugin)),
|
||||
@ -20,9 +22,9 @@ InferenceEngine::ICompiledModelWrapper::ICompiledModelWrapper(
|
||||
m_inputs = inputs;
|
||||
m_outputs = outputs;
|
||||
}
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> InferenceEngine::ICompiledModelWrapper::create_infer_request()
|
||||
const {
|
||||
return m_model->CreateInferRequest();
|
||||
|
||||
std::shared_ptr<ov::IAsyncInferRequest> InferenceEngine::ICompiledModelWrapper::create_infer_request() const {
|
||||
return ov::legacy_convert::convert_infer_request(m_model->CreateInferRequest());
|
||||
}
|
||||
|
||||
void InferenceEngine::ICompiledModelWrapper::export_model(std::ostream& model) const {
|
||||
|
@ -13,7 +13,7 @@ namespace InferenceEngine {
|
||||
class ICompiledModelWrapper : public ov::ICompiledModel {
|
||||
public:
|
||||
ICompiledModelWrapper(const std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>& model);
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_infer_request() const override;
|
||||
std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;
|
||||
|
||||
void export_model(std::ostream& model) const override;
|
||||
|
||||
@ -30,7 +30,7 @@ public:
|
||||
private:
|
||||
std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> m_model;
|
||||
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_sync_infer_request() const override {
|
||||
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
};
|
||||
|
238
src/inference/src/dev/isync_infer_request.cpp
Normal file
238
src/inference/src/dev/isync_infer_request.cpp
Normal file
@ -0,0 +1,238 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/runtime/isync_infer_request.hpp"
|
||||
|
||||
#include "cpp_interfaces/plugin_itt.hpp"
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/core/layout.hpp"
|
||||
#include "openvino/core/parallel.hpp"
|
||||
#include "openvino/op/util/op_types.hpp"
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "openvino/runtime/iinfer_request.hpp"
|
||||
#include "openvino/runtime/remote_context.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
void check_batched_tensors(const ov::Output<const ov::Node>& input, const std::vector<ov::Tensor>& tensors) {
|
||||
OPENVINO_ASSERT(!tensors.empty(), "set_input_tensors/set_tensors can't be called with empty tensors");
|
||||
OPENVINO_ASSERT(
|
||||
tensors.size() != 1,
|
||||
"Internal error (plugin): check_batched_tensors is not allowed to have only one tensor inside batch");
|
||||
|
||||
auto layout = ov::layout::get_layout(input);
|
||||
OPENVINO_ASSERT(ov::layout::has_batch(layout),
|
||||
"set_input_tensors/set_tensors can be used only for inputs with N(batch) dimension"
|
||||
" 'layout' defined. Current layout is ",
|
||||
layout.to_string());
|
||||
auto batch_idx = ov::layout::batch_idx(layout);
|
||||
if (batch_idx < 0) {
|
||||
// TODO: Do we need this logic?
|
||||
batch_idx += static_cast<int64_t>(tensors[0].get_shape().size());
|
||||
}
|
||||
OPENVINO_ASSERT(batch_idx == 0,
|
||||
"set_input_tensors/set_tensors is not currently supported for batch dimension index ",
|
||||
batch_idx,
|
||||
" != 0");
|
||||
std::for_each(tensors.begin(), tensors.end(), [&batch_idx](const ov::Tensor& item) {
|
||||
OPENVINO_ASSERT(item.get_shape()[batch_idx] == 1,
|
||||
"set_input_tensors/set_tensors. Tensors shall represent one item in a batch, ",
|
||||
item.get_shape()[batch_idx],
|
||||
" provided");
|
||||
});
|
||||
auto tensors_size = static_cast<int>(tensors.size());
|
||||
if (input.get_partial_shape().rank().is_static()) {
|
||||
OPENVINO_ASSERT(batch_idx >= 0 && batch_idx < input.get_partial_shape().rank().get_length(),
|
||||
"set_input_tensors/set_tensors error. Layout ",
|
||||
layout.to_string(),
|
||||
" is incorrect for operation with shape ",
|
||||
input.get_partial_shape());
|
||||
auto batch = input.get_partial_shape()[batch_idx];
|
||||
|
||||
OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size,
|
||||
"set_input_tensors/set_tensors error. Input shape ",
|
||||
input.get_partial_shape(),
|
||||
"batch ",
|
||||
batch,
|
||||
"doesn't match with total blobs count: ",
|
||||
tensors_size);
|
||||
}
|
||||
|
||||
// In future consider checking if blobs point to contiguous range of memory and use single 'SetBlob' instead
|
||||
auto batched_shape = tensors[0].get_shape();
|
||||
auto element_type = tensors[0].get_element_type();
|
||||
batched_shape[batch_idx] = tensors_size;
|
||||
for (const auto& item : tensors) {
|
||||
auto item_shape = item.get_shape();
|
||||
item_shape[batch_idx] = batched_shape[batch_idx];
|
||||
OPENVINO_ASSERT(item_shape == batched_shape && item.get_element_type() == element_type &&
|
||||
"set_input_tensors/set_tensors error. Tensor with element type ",
|
||||
item.get_element_type(),
|
||||
" and shape ",
|
||||
item_shape,
|
||||
" is not compatible with batched tensor with element type ",
|
||||
element_type,
|
||||
" and shape ",
|
||||
batched_shape);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ov::IInferRequest::~IInferRequest() = default;
|
||||
|
||||
ov::ISyncInferRequest::ISyncInferRequest(const std::shared_ptr<ov::ICompiledModel>& compiled_model)
|
||||
: m_compiled_model(compiled_model) {}
|
||||
|
||||
const std::vector<ov::Output<const ov::Node>>& ov::ISyncInferRequest::get_inputs() const {
|
||||
return m_compiled_model->inputs();
|
||||
}
|
||||
const std::vector<ov::Output<const ov::Node>>& ov::ISyncInferRequest::get_outputs() const {
|
||||
return m_compiled_model->outputs();
|
||||
}
|
||||
const std::shared_ptr<ov::ICompiledModel>& ov::ISyncInferRequest::get_compiled_model() const {
|
||||
return m_compiled_model;
|
||||
}
|
||||
|
||||
ov::ISyncInferRequest::FoundPort ov::ISyncInferRequest::find_port(const ov::Output<const ov::Node>& port) const {
|
||||
ov::ISyncInferRequest::FoundPort::Type type = ov::ISyncInferRequest::FoundPort::Type::INPUT;
|
||||
for (const auto& ports : {get_inputs(), get_outputs()}) {
|
||||
for (size_t i = 0; i < ports.size(); i++) {
|
||||
if (ports[i] == port) {
|
||||
return {i, type};
|
||||
}
|
||||
}
|
||||
type = ov::ISyncInferRequest::FoundPort::Type::OUTPUT;
|
||||
}
|
||||
return {0, ov::ISyncInferRequest::FoundPort::Type::NOT_FOUND};
|
||||
}
|
||||
|
||||
void ov::ISyncInferRequest::convert_batched_tensors() {
|
||||
for (const auto& item : m_batched_tensors) {
|
||||
auto tmp_shape = item.second.at(0).get_shape();
|
||||
auto tmp_et = item.second.at(0).get_element_type();
|
||||
tmp_shape[0] = item.second.size();
|
||||
ov::RemoteContext remote_context;
|
||||
ov::Tensor input_tensor;
|
||||
try {
|
||||
auto net = get_compiled_model();
|
||||
if (net) {
|
||||
remote_context = net->get_context();
|
||||
}
|
||||
} catch (const ov::NotImplemented&) {
|
||||
}
|
||||
if (remote_context._impl) {
|
||||
input_tensor = remote_context.create_host_tensor(tmp_et, tmp_shape);
|
||||
} else {
|
||||
input_tensor = ov::Tensor(tmp_et, tmp_shape);
|
||||
}
|
||||
auto ptr = input_tensor.data<uint8_t>();
|
||||
|
||||
// Perform memory copy
|
||||
ov::parallel_for(input_tensor.get_size(), [&](size_t i) {
|
||||
const auto& tensor = item.second.at(i);
|
||||
memcpy(ptr + i * tensor.get_byte_size(), tensor.data<uint8_t>(), tensor.get_byte_size());
|
||||
});
|
||||
set_tensor(get_inputs()[item.first], input_tensor);
|
||||
}
|
||||
}
|
||||
|
||||
ov::Tensor ov::ISyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
|
||||
OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "get_tensor");
|
||||
auto found_port = find_port(port);
|
||||
OPENVINO_ASSERT(!found_port.found(), "Cannot find tensor for port ", port);
|
||||
if (found_port.is_input()) {
|
||||
auto input = m_compiled_model->inputs().at(found_port.idx);
|
||||
// TODO: Support dynamic inputs
|
||||
// if (input.get_partial_shape().is_dynamic())
|
||||
return m_input_tensors.at(found_port.idx);
|
||||
}
|
||||
|
||||
auto output = m_compiled_model->outputs().at(found_port.idx);
|
||||
// TODO: Support dynamic inputs
|
||||
// if (output.get_partial_shape().is_dynamic())
|
||||
return m_output_tensors.at(found_port.idx);
|
||||
}
|
||||
|
||||
void ov::ISyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) {
|
||||
OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "set_tensor");
|
||||
auto found_port = find_port(port);
|
||||
OPENVINO_ASSERT(!found_port.found(), "Cannot find tensor for port ", port);
|
||||
OPENVINO_ASSERT(
|
||||
port.get_element_type() == tensor.get_element_type(),
|
||||
"Failed to set output tensor, the tensor element type is not corresponding with output element type");
|
||||
OPENVINO_ASSERT(port.get_partial_shape().is_dynamic() || tensor.get_shape() == port.get_shape(),
|
||||
"Input tensor size is not equal with model input size (",
|
||||
tensor.get_shape(),
|
||||
" != ",
|
||||
port.get_shape(),
|
||||
").");
|
||||
if (found_port.is_input()) {
|
||||
m_input_tensors.at(found_port.idx) = tensor;
|
||||
m_batched_tensors.erase(found_port.idx);
|
||||
} else {
|
||||
m_output_tensors.at(found_port.idx) = tensor;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ov::Tensor> ov::ISyncInferRequest::get_tensors(const ov::Output<const ov::Node>& port) const {
|
||||
OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "get_tensors");
|
||||
auto found_port = find_port(port);
|
||||
OPENVINO_ASSERT(!found_port.found() && found_port.is_input(), "Cannot find input tensors for port ", port);
|
||||
if (m_batched_tensors.count(found_port.idx))
|
||||
return m_batched_tensors.at(found_port.idx);
|
||||
return {};
|
||||
}
|
||||
|
||||
void ov::ISyncInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
|
||||
const std::vector<ov::Tensor>& tensors) {
|
||||
OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "set_tensors");
|
||||
auto found_port = find_port(port);
|
||||
OPENVINO_ASSERT(!found_port.found() && found_port.is_input(), "Cannot find input tensors for port ", port);
|
||||
if (tensors.size() == 1) {
|
||||
set_tensor(port, tensors[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
check_batched_tensors(port, tensors);
|
||||
set_tensors_impl(port, tensors);
|
||||
}
|
||||
|
||||
void ov::ISyncInferRequest::set_tensors_impl(const ov::Output<const ov::Node> port,
|
||||
const std::vector<ov::Tensor>& tensors) {
|
||||
OPENVINO_ASSERT_HELPER(::ov::NotImplemented,
|
||||
"",
|
||||
false,
|
||||
"Not Implemented",
|
||||
"set_input_tensors/set_tensors are not supported by this plugin");
|
||||
}
|
||||
|
||||
void ov::ISyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) const {
|
||||
bool is_input = ov::op::util::is_parameter(port.get_node());
|
||||
std::string tensor_type = is_input ? "input" : "output";
|
||||
|
||||
bool is_dynamic = port.get_partial_shape().is_dynamic();
|
||||
OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor.get_shape(),
|
||||
"The ",
|
||||
tensor_type,
|
||||
" tensor size is not equal to the model ",
|
||||
tensor_type,
|
||||
" type: got ",
|
||||
tensor.get_size(),
|
||||
" expecting ",
|
||||
port.get_shape(),
|
||||
".");
|
||||
}
|
||||
|
||||
void ov::ISyncInferRequest::check_tensors() const {
|
||||
const auto& inputs = m_compiled_model->inputs();
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
check_tensor(inputs[i], m_input_tensors[i]);
|
||||
}
|
||||
const auto& outputs = m_compiled_model->outputs();
|
||||
for (size_t i = 0; i < outputs.size(); i++) {
|
||||
check_tensor(outputs[i], m_output_tensors[i]);
|
||||
}
|
||||
}
|
295
src/inference/src/infer_request.cpp
Normal file
295
src/inference/src/infer_request.cpp
Normal file
@ -0,0 +1,295 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/runtime/infer_request.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "ie_common.h"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/runtime/compiled_model.hpp"
|
||||
#include "openvino/runtime/exception.hpp"
|
||||
#include "openvino/runtime/iasync_infer_request.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#define OV_INFER_REQ_CALL_STATEMENT(...) \
|
||||
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized."); \
|
||||
try { \
|
||||
__VA_ARGS__; \
|
||||
} catch (const ::InferenceEngine::RequestBusy& ex) { \
|
||||
throw ov::Busy(ex.what()); \
|
||||
} catch (const std::exception& ex) { \
|
||||
throw ov::Exception(ex.what()); \
|
||||
} catch (...) { \
|
||||
OPENVINO_ASSERT(false, "Unexpected exception"); \
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
inline bool getPort(ov::Output<const ov::Node>& res_port,
|
||||
const std::string& name,
|
||||
const std::vector<std::vector<ov::Output<const ov::Node>>>& vector_ports) {
|
||||
for (const auto& ports : vector_ports) {
|
||||
for (const auto& port : ports) {
|
||||
const auto& names = port.get_names();
|
||||
if (names.find(name) != names.end()) {
|
||||
res_port = port;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace ov {
|
||||
|
||||
InferRequest::~InferRequest() {
|
||||
_impl = {};
|
||||
}
|
||||
|
||||
InferRequest::InferRequest(const std::shared_ptr<ov::IAsyncInferRequest>& impl, const std::shared_ptr<void>& so)
|
||||
: _impl{impl},
|
||||
_so{so} {
|
||||
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
|
||||
}
|
||||
|
||||
void InferRequest::set_tensor(const ov::Output<const ov::Node>& port, const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({ _impl->set_tensor(port, tensor); });
|
||||
}
|
||||
|
||||
void InferRequest::set_tensor(const ov::Output<ov::Node>& port, const Tensor& tensor) {
|
||||
set_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()), tensor);
|
||||
}
|
||||
|
||||
void InferRequest::set_tensor(const std::string& name, const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
ov::Output<const ov::Node> port;
|
||||
OPENVINO_ASSERT(::getPort(port, name, {_impl->get_inputs(), _impl->get_outputs()}),
|
||||
"Port for tensor name " + name + " was not found.");
|
||||
set_tensor(port, tensor);
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::set_tensors(const std::string& name, const std::vector<Tensor>& tensors) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
ov::Output<const ov::Node> port;
|
||||
OPENVINO_ASSERT(::getPort(port, name, {_impl->get_inputs()}),
|
||||
"set_tensors error. Input port for tensor name ",
|
||||
name,
|
||||
" was not found.");
|
||||
set_tensors(port, tensors);
|
||||
})
|
||||
}
|
||||
|
||||
void InferRequest::set_tensors(const ov::Output<const ov::Node>& port, const std::vector<Tensor>& tensors) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({ _impl->set_tensors(port, tensors); })
|
||||
}
|
||||
|
||||
void InferRequest::set_input_tensor(size_t idx, const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto& inputs = _impl->get_inputs();
|
||||
OPENVINO_ASSERT(inputs.size() > idx,
|
||||
"Input port for index ",
|
||||
idx,
|
||||
" was not found! The model has only ",
|
||||
inputs.size(),
|
||||
" inputs.");
|
||||
set_tensor(inputs.at(idx), tensor);
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::set_input_tensor(const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto& inputs = _impl->get_inputs();
|
||||
OPENVINO_ASSERT(inputs.size() == 1,
|
||||
"set_input_tensor() must be called on a function with exactly one parameter.");
|
||||
set_tensor(inputs.at(0), tensor);
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::set_input_tensors(size_t idx, const std::vector<Tensor>& tensors) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
OPENVINO_ASSERT(idx < _impl->get_inputs().size(),
|
||||
"set_input_tensors error. Input port for index ",
|
||||
idx,
|
||||
" is out of bounds. Model has only ",
|
||||
_impl->get_inputs().size(),
|
||||
" inputs");
|
||||
set_tensors(_impl->get_inputs().at(idx), tensors);
|
||||
})
|
||||
}
|
||||
|
||||
void InferRequest::set_input_tensors(const std::vector<Tensor>& tensors) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
OPENVINO_ASSERT(_impl->get_inputs().size() == 1,
|
||||
"set_input_tensors(tensors) must be used for single-input models only. Model has ",
|
||||
_impl->get_inputs().size(),
|
||||
" inputs");
|
||||
set_tensors(_impl->get_inputs().at(0), tensors);
|
||||
})
|
||||
}
|
||||
|
||||
void InferRequest::set_output_tensor(size_t idx, const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto& outputs = _impl->get_outputs();
|
||||
OPENVINO_ASSERT(outputs.size() > idx,
|
||||
"Output port for index ",
|
||||
idx,
|
||||
" was not found! The model has only ",
|
||||
outputs.size(),
|
||||
" outputs.");
|
||||
set_tensor(outputs.at(idx), tensor);
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::set_output_tensor(const Tensor& tensor) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto& outputs = _impl->get_outputs();
|
||||
OPENVINO_ASSERT(outputs.size() == 1,
|
||||
"set_output_tensor() must be called on a function with exactly one parameter.");
|
||||
set_tensor(outputs.at(0), tensor);
|
||||
});
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_tensor(const ov::Output<const ov::Node>& port) {
|
||||
std::vector<std::shared_ptr<void>> soVec;
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
OPENVINO_ASSERT(_impl->get_tensors(port).empty(),
|
||||
"get_tensor shall not be used together with batched "
|
||||
"set_tensors/set_input_tensors for port '",
|
||||
port,
|
||||
"'");
|
||||
auto tensor = _impl->get_tensor(port);
|
||||
tensor._so.emplace_back(_so);
|
||||
|
||||
return tensor;
|
||||
});
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_tensor(const ov::Output<ov::Node>& port) {
|
||||
return get_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()));
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_tensor(const std::string& name) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
ov::Output<const ov::Node> port;
|
||||
OPENVINO_ASSERT(::getPort(port, name, {_impl->get_inputs(), _impl->get_outputs()}),
|
||||
"Port for tensor name " + name + " was not found.");
|
||||
return get_tensor(port);
|
||||
});
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_input_tensor(size_t idx) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->get_inputs().at(idx)); });
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_output_tensor(size_t idx) {
|
||||
OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->get_outputs().at(idx)); });
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_input_tensor() {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto inputs = _impl->get_inputs();
|
||||
if (inputs.size() != 1) {
|
||||
throw ov::Exception("get_input_tensor() must be called on a function with exactly one parameter.");
|
||||
}
|
||||
return get_tensor(inputs.at(0));
|
||||
});
|
||||
}
|
||||
|
||||
Tensor InferRequest::get_output_tensor() {
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
const auto outputs = _impl->get_outputs();
|
||||
if (outputs.size() != 1) {
|
||||
throw ov::Exception("get_output_tensor() must be called on a function with exactly one parameter.");
|
||||
}
|
||||
return get_tensor(outputs.at(0));
|
||||
});
|
||||
}
|
||||
|
||||
void InferRequest::infer() {
|
||||
OV_INFER_REQ_CALL_STATEMENT(_impl->infer());
|
||||
}
|
||||
|
||||
void InferRequest::cancel() {
|
||||
OV_INFER_REQ_CALL_STATEMENT(_impl->cancel());
|
||||
}
|
||||
|
||||
std::vector<ProfilingInfo> InferRequest::get_profiling_info() const {
|
||||
OV_INFER_REQ_CALL_STATEMENT(return _impl->get_profiling_info());
|
||||
}
|
||||
|
||||
void InferRequest::start_async() {
|
||||
OV_INFER_REQ_CALL_STATEMENT(_impl->start_async());
|
||||
}
|
||||
|
||||
void InferRequest::wait() {
|
||||
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
|
||||
try {
|
||||
_impl->wait();
|
||||
} catch (const ov::Cancelled&) {
|
||||
throw;
|
||||
} catch (const ie::InferCancelled& e) {
|
||||
throw Cancelled{e.what()};
|
||||
} catch (const std::exception& ex) {
|
||||
throw Exception(ex.what());
|
||||
} catch (...) {
|
||||
OPENVINO_UNREACHABLE("Unexpected exception");
|
||||
}
|
||||
}
|
||||
|
||||
bool InferRequest::wait_for(const std::chrono::milliseconds timeout) {
|
||||
OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
|
||||
try {
|
||||
return _impl->wait_for(timeout);
|
||||
} catch (const ie::InferCancelled& e) {
|
||||
throw Cancelled{e.what()};
|
||||
} catch (const std::exception& ex) {
|
||||
throw Exception(ex.what());
|
||||
} catch (...) {
|
||||
OPENVINO_UNREACHABLE("Unexpected exception");
|
||||
}
|
||||
}
|
||||
|
||||
void InferRequest::set_callback(std::function<void(std::exception_ptr)> callback) {
|
||||
OV_INFER_REQ_CALL_STATEMENT(_impl->set_callback(std::move(callback));)
|
||||
}
|
||||
|
||||
std::vector<VariableState> InferRequest::query_state() {
|
||||
std::vector<VariableState> variable_states;
|
||||
OV_INFER_REQ_CALL_STATEMENT({
|
||||
for (auto&& state : _impl->query_state()) {
|
||||
auto soVec = state._so;
|
||||
soVec.emplace_back(_so);
|
||||
variable_states.emplace_back(ov::VariableState{state._impl, soVec});
|
||||
}
|
||||
})
|
||||
return variable_states;
|
||||
}
|
||||
|
||||
CompiledModel InferRequest::get_compiled_model() {
|
||||
OV_INFER_REQ_CALL_STATEMENT(return {_impl->get_compiled_model(), _so});
|
||||
}
|
||||
|
||||
bool InferRequest::operator!() const noexcept {
|
||||
return !_impl;
|
||||
}
|
||||
|
||||
InferRequest::operator bool() const noexcept {
|
||||
return (!!_impl);
|
||||
}
|
||||
|
||||
bool InferRequest::operator!=(const InferRequest& r) const noexcept {
|
||||
return !(r == *this);
|
||||
}
|
||||
|
||||
bool InferRequest::operator==(const InferRequest& r) const noexcept {
|
||||
return r._impl == _impl;
|
||||
}
|
||||
|
||||
} // namespace ov
|
@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
|
@ -26,6 +26,7 @@ ov_mark_target_as_cc(${TARGET_NAME})
|
||||
|
||||
target_include_directories(${TARGET_NAME} PRIVATE
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"${OpenVINO_SOURCE_DIR}/src/inference/src/dev" # TODO: remove after migration to new infer request
|
||||
"${TEMPLATE_PLUGIN_SOURCE_DIR}/include")
|
||||
|
||||
# link common Inference Engine libraries
|
||||
|
@ -6,8 +6,12 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "converter_utils.hpp"
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/runtime/iinfer_request.hpp"
|
||||
#include "openvino/runtime/isync_infer_request.hpp"
|
||||
#include "plugin.hpp"
|
||||
#include "template/config.hpp"
|
||||
#include "template_async_infer_request.hpp"
|
||||
@ -124,17 +128,8 @@ void TemplatePlugin::CompiledModel::compile_model(const std::shared_ptr<ov::Mode
|
||||
// ! [executable_network:map_graph]
|
||||
|
||||
// ! [executable_network:create_infer_request]
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> TemplatePlugin::CompiledModel::create_infer_request() const {
|
||||
auto internal_request = create_sync_infer_request();
|
||||
return std::make_shared<TemplateAsyncInferRequest>(
|
||||
std::static_pointer_cast<TemplatePlugin::TemplateInferRequest>(internal_request),
|
||||
get_task_executor(),
|
||||
get_template_plugin()->_waitExecutor,
|
||||
get_callback_executor());
|
||||
}
|
||||
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> TemplatePlugin::CompiledModel::create_sync_infer_request()
|
||||
const {
|
||||
std::shared_ptr<ov::IAsyncInferRequest> TemplatePlugin::CompiledModel::create_infer_request() const {
|
||||
// auto internal_request = create_sync_infer_request();
|
||||
std::vector<std::shared_ptr<const ov::Node>> _inputs, _outputs;
|
||||
for (const auto& output : m_model->inputs()) {
|
||||
_inputs.emplace_back(output.get_node_shared_ptr());
|
||||
@ -143,10 +138,36 @@ std::shared_ptr<InferenceEngine::IInferRequestInternal> TemplatePlugin::Compiled
|
||||
_outputs.emplace_back(output.get_node_shared_ptr());
|
||||
}
|
||||
|
||||
return std::make_shared<TemplateInferRequest>(
|
||||
auto internal_request = std::make_shared<TemplateInferRequest>(
|
||||
_inputs,
|
||||
_outputs,
|
||||
std::static_pointer_cast<const TemplatePlugin::CompiledModel>(shared_from_this()));
|
||||
auto async_infer_request = std::make_shared<TemplateAsyncInferRequest>(
|
||||
std::static_pointer_cast<TemplatePlugin::TemplateInferRequest>(internal_request),
|
||||
get_task_executor(),
|
||||
get_template_plugin()->_waitExecutor,
|
||||
get_callback_executor());
|
||||
|
||||
async_infer_request->setPointerToExecutableNetworkInternal(
|
||||
ov::legacy_convert::convert_compiled_model(std::const_pointer_cast<ov::ICompiledModel>(shared_from_this())));
|
||||
|
||||
return ov::legacy_convert::convert_infer_request(async_infer_request);
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::ISyncInferRequest> TemplatePlugin::CompiledModel::create_sync_infer_request() const {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
// std::vector<std::shared_ptr<const ov::Node>> _inputs, _outputs;
|
||||
// for (const auto& output : m_model->inputs()) {
|
||||
// _inputs.emplace_back(output.get_node_shared_ptr());
|
||||
// }
|
||||
// for (const auto& output : outputs()) {
|
||||
// _outputs.emplace_back(output.get_node_shared_ptr());
|
||||
// }
|
||||
//
|
||||
// return std::make_shared<TemplateInferRequest>(
|
||||
// _inputs,
|
||||
// _outputs,
|
||||
// std::static_pointer_cast<const TemplatePlugin::CompiledModel>(shared_from_this()));
|
||||
}
|
||||
// ! [executable_network:create_infer_request]
|
||||
|
||||
|
@ -5,6 +5,9 @@
|
||||
#pragma once
|
||||
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "openvino/runtime/iinfer_request.hpp"
|
||||
#include "openvino/runtime/isync_infer_request.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "template_config.hpp"
|
||||
#include "template_infer_request.hpp"
|
||||
|
||||
@ -34,10 +37,10 @@ public:
|
||||
virtual ov::Any get_property(const std::string& name) const override;
|
||||
|
||||
ov::RemoteContext get_context() const override;
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_infer_request() const override;
|
||||
std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;
|
||||
|
||||
protected:
|
||||
std::shared_ptr<InferenceEngine::IInferRequestInternal> create_sync_infer_request() const override;
|
||||
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
|
||||
|
||||
private:
|
||||
friend class TemplateInferRequest;
|
||||
|
Loading…
Reference in New Issue
Block a user