Added classes for Sync and Async infer requests (#15387)

* Added classes for Sync and Async infer requests * Changed hierarchy of Infer requests * Fixed code style * Fixed some tests * Fixed naming style * Fixed template plugin build * Temporary disable python tests * Revert "Temporary disable python tests" This reverts commit c9aa9d79f8. * Fixed template plugin tests * Disable python tests * Disable more steps * Merged CI * Revert "Merged CI" This reverts commit 2f69574870. * Try to fix segfault in python tests * Remove default constructor * Fixed documentation * Fixed CPU tests * Fixed Windows build * Fixed comments * Fixed build
2023-02-14 06:59:53 +04:00 · 2023-02-14 06:59:53 +04:00 · b80d05e0e1
commit b80d05e0e1
parent 609dee0abc
22 changed files with 1769 additions and 380 deletions
--- a/src/core/include/openvino/runtime/tensor.hpp
+++ b/src/core/include/openvino/runtime/tensor.hpp
@ -19,6 +19,7 @@

 namespace InferenceEngine {
 class Blob;
+class IAsyncInferRequestWrapper;
 }  // namespace InferenceEngine

 namespace ov {
@ -28,6 +29,7 @@ class CoreImpl;
 class InferRequest;
 class RemoteContext;
 class VariableState;
+class IInferRequestInternalWrapper;

 /**
 * @brief Tensor API holding host memory
@ -52,6 +54,8 @@ protected:
    friend class ov::InferRequest;
    friend class ov::RemoteContext;
    friend class ov::VariableState;
+    friend class ov::IInferRequestInternalWrapper;
+    friend class InferenceEngine::IAsyncInferRequestWrapper;

 public:
    /// @brief Default constructor
--- a/src/inference/dev_api/openvino/runtime/iasync_infer_request.hpp
+++ b/src/inference/dev_api/openvino/runtime/iasync_infer_request.hpp
@ -0,0 +1,276 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief OpenVINO Runtime AsyncInferRequest interface
+ * @file openvino/runtime/iasync_nfer_request.hpp
+ */
+
+#pragma once
+
+#include <future>
+#include <memory>
+
+#include "openvino/runtime/common.hpp"
+#include "openvino/runtime/exception.hpp"
+#include "openvino/runtime/iinfer_request.hpp"
+#include "openvino/runtime/profiling_info.hpp"
+#include "openvino/runtime/tensor.hpp"
+#include "threading/ie_itask_executor.hpp"
+
+namespace ov {
+
+/**
+ * @brief Base class with default implementation of asynchronous multi staged inference request.
+ *        To customize pipeline stages derived class should change the content
+ *        of IAsyncInferRequest::m_pipeline member container.
+ *        It consists of pairs of tasks and executors which will run the task.
+ *        The class is recommended to be used by plugins as a base class for asynchronous inference request
+ * implementation.
+ * @note  To synchronize derived context with stages
+ *        derived class should call IAsyncInferRequest::stop_and_wait() function in destructor.
+ * @par Example
+ *        Here is an example of asynchronous inference request implementation for some accelerator device.
+ *        It uses 5 different executors to run different stages of a synchronous inference request.
+ */
+class OPENVINO_RUNTIME_API IAsyncInferRequest : public IInferRequest {
+public:
+    IAsyncInferRequest(const std::shared_ptr<IInferRequest>& request,
+                       const InferenceEngine::ITaskExecutor::Ptr& task_executor,
+                       const InferenceEngine::ITaskExecutor::Ptr& callback_executor);
+    ~IAsyncInferRequest();
+
+    /**
+     * @brief Start inference of specified input(s) in asynchronous mode
+     * @note The method returns immediately. Inference starts also immediately.
+     */
+    virtual void start_async();
+
+    /**
+     * @brief Waits for the result to become available.
+     */
+    virtual void wait();
+    /**
+     * @brief Waits for the result to become available. Blocks until specified timeout has elapsed or the result
+     * becomes available, whichever comes first.
+     * @param timeout - maximum duration in milliseconds to block for
+     * @return A true if results are ready.
+     */
+    virtual bool wait_for(const std::chrono::milliseconds& timeout);
+
+    /**
+     * @brief Cancel current inference request execution
+     */
+    virtual void cancel();
+
+    /**
+     * @brief Set callback function which will be called on success or failure of asynchronous request
+     * @param callback - function to be called with the following description:
+     */
+    virtual void set_callback(std::function<void(std::exception_ptr)> callback);
+
+    /**
+     * @brief Infers specified input(s) in synchronous mode
+     * @note blocks all method of InferRequest while request is ongoing (running or waiting in queue)
+     */
+    void infer() override;
+
+    /**
+     * @brief Queries performance measures per layer to identify the most time consuming operation.
+     * @note Not all plugins provide meaningful data.
+     * @return Vector of profiling information for operations in a model.
+     */
+    std::vector<ov::ProfilingInfo> get_profiling_info() const override;
+
+    /**
+     * @brief Gets an input/output tensor for inference.
+     * @note If the tensor with the specified @p port is not found, an exception is thrown.
+     * @param port Port of the tensor to get.
+     * @return Tensor for the port @p port.
+     */
+    ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const override;
+
+    /**
+     * @brief Sets an input/output tensor to infer.
+     * @param port Port of the input or output tensor.
+     * @param tensor Reference to a tensor. The element_type and shape of a tensor must match
+     * the model's input/output element_type and size.
+     */
+    void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override;
+
+    /**
+     * @brief Gets a batch of tensors for input data to infer by input port.
+     * Model input must have batch dimension, and the number of @p tensors must match the batch size.
+     * The current version supports setting tensors to model inputs only. If @p port is associated
+     * with output (or any other non-input node), an exception is thrown.
+     *
+     * @param port Port of the input tensor.
+     * @param tensors Input tensors for batched infer request. The type of each tensor must match the model
+     * input element type and shape (except batch dimension). Total size of tensors must match the input size.
+     * @return vector of tensors
+     */
+    std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const override;
+    /**
+     * @brief Sets a batch of tensors for input data to infer by input port.
+     * Model input must have batch dimension, and the number of @p tensors must match the batch size.
+     * The current version supports setting tensors to model inputs only. If @p port is associated
+     * with output (or any other non-input node), an exception is thrown.
+     *
+     * @param port Port of the input tensor.
+     * @param tensors Input tensors for batched infer request. The type of each tensor must match the model
+     * input element type and shape (except batch dimension). Total size of tensors must match the input size.
+     */
+    void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) override;
+
+    /**
+     * @brief Gets state control interface for the given infer request.
+     *
+     * State control essential for recurrent models.
+     * @return Vector of Variable State objects.
+     */
+    std::vector<ov::VariableState> query_state() const override;
+
+    /**
+     * @brief Gets pointer to compiled model (usually synchronous request holds the compiled model)
+     *
+     * @return Pointer to the compiled model
+     */
+    const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const override;
+
+    /**
+     * @brief Gets inputs for infer request
+     *
+     * @return vector of input ports
+     */
+    const std::vector<ov::Output<const ov::Node>>& get_inputs() const override;
+
+    /**
+     * @brief Gets outputs for infer request
+     *
+     * @return vector of output ports
+     */
+    const std::vector<ov::Output<const ov::Node>>& get_outputs() const override;
+
+protected:
+    using Stage = std::pair<InferenceEngine::ITaskExecutor::Ptr, InferenceEngine::Task>;
+    /**
+     * @brief Pipeline is vector of stages
+     */
+    using Pipeline = std::vector<Stage>;
+
+    /**
+     * @brief Forbids pipeline start and wait for all started pipelines.
+     * @note Should be called in derived class destructor to wait for completion of usage of derived context captured by
+     * pipeline tasks
+     */
+    void stop_and_wait();
+
+    /**
+     * @brief Throws exception if inference request is busy or canceled
+     */
+    void check_state() const;
+    /**
+     * @brief Performs inference of pipeline in syncronous mode
+     * @note Used by Infer which ensures thread-safety and calls this method after.
+     */
+    virtual void infer_thread_unsafe();
+    /**
+     * @brief Starts an asynchronous pipeline thread unsafe.
+     * @note Used by start_async which ensures thread-safety and calls this method after.
+     */
+    virtual void start_async_thread_unsafe();
+    /**
+     * @brief Check that all tensors are valid. Throws an exception if it's not.
+     */
+    void check_tensors() const override;
+
+    Pipeline m_pipeline;       //!< Pipeline variable that should be filled by inherited class.
+    Pipeline m_sync_pipeline;  //!< Synchronous pipeline variable that should be filled by inherited class.
+
+private:
+    enum InferState { IDLE, BUSY, CANCELLED, STOP };
+    using Futures = std::vector<std::shared_future<void>>;
+    enum Stage_e : std::uint8_t { EXECUTOR, TASK };
+    InferState m_state = InferState::IDLE;
+    Futures m_futures;
+    std::promise<void> m_promise;
+
+    friend struct DisableCallbackGuard;
+    struct DisableCallbackGuard {
+        explicit DisableCallbackGuard(IAsyncInferRequest* this_) : _this{this_} {
+            std::lock_guard<std::mutex> lock{_this->m_mutex};
+            std::swap(m_callback, _this->m_callback);
+        }
+        ~DisableCallbackGuard() {
+            std::lock_guard<std::mutex> lock{_this->m_mutex};
+            _this->m_callback = m_callback;
+        }
+        IAsyncInferRequest* _this = nullptr;
+        std::function<void(std::exception_ptr)> m_callback;
+    };
+
+    void run_first_stage(const Pipeline::iterator itBeginStage,
+                         const Pipeline::iterator itEndStage,
+                         const InferenceEngine::ITaskExecutor::Ptr callbackExecutor = {});
+
+    InferenceEngine::Task make_next_stage_task(const Pipeline::iterator itStage,
+                                               const Pipeline::iterator itEndStage,
+                                               const InferenceEngine::ITaskExecutor::Ptr callbackExecutor);
+
+    template <typename F>
+    void infer_impl(const F& f) {
+        check_tensors();
+        InferState state = InferState::IDLE;
+        {
+            std::lock_guard<std::mutex> lock{m_mutex};
+            state = m_state;
+            switch (m_state) {
+            case InferState::BUSY:
+                throw ov::Busy("Infer Request is busy");
+            case InferState::CANCELLED:
+                throw ov::Cancelled("Infer Request was canceled");
+            case InferState::IDLE: {
+                m_futures.erase(std::remove_if(std::begin(m_futures),
+                                               std::end(m_futures),
+                                               [](const std::shared_future<void>& future) {
+                                                   if (future.valid()) {
+                                                       return (std::future_status::ready ==
+                                                               future.wait_for(std::chrono::milliseconds{0}));
+                                                   } else {
+                                                       return true;
+                                                   }
+                                               }),
+                                m_futures.end());
+                m_promise = {};
+                m_futures.emplace_back(m_promise.get_future().share());
+            } break;
+            case InferState::STOP:
+                break;
+            }
+            m_state = InferState::BUSY;
+        }
+        if (state != InferState::STOP) {
+            try {
+                f();
+            } catch (...) {
+                m_promise.set_exception(std::current_exception());
+                std::lock_guard<std::mutex> lock{m_mutex};
+                m_state = InferState::IDLE;
+                throw;
+            }
+        }
+    }
+
+    std::shared_ptr<IInferRequest> m_sync_request;
+
+    InferenceEngine::ITaskExecutor::Ptr m_request_executor;  //!< Used to run inference CPU tasks.
+    InferenceEngine::ITaskExecutor::Ptr
+        m_callback_executor;  //!< Used to run post inference callback in asynchronous pipline
+    InferenceEngine::ITaskExecutor::Ptr
+        m_sync_callback_executor;  //!< Used to run post inference callback in synchronous pipline
+    mutable std::mutex m_mutex;
+    std::function<void(std::exception_ptr)> m_callback;
+};
+
+}  // namespace ov
--- a/src/inference/dev_api/openvino/runtime/icompiled_model.hpp
+++ b/src/inference/dev_api/openvino/runtime/icompiled_model.hpp
@ -13,15 +13,14 @@
 #include <ostream>
 #include <vector>

-#include "cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp"
 #include "openvino/core/node_output.hpp"
 #include "openvino/runtime/common.hpp"
+#include "openvino/runtime/isync_infer_request.hpp"
 #include "openvino/runtime/remote_context.hpp"
 #include "threading/ie_cpu_streams_executor.hpp"
 #include "threading/ie_itask_executor.hpp"

 namespace InferenceEngine {
-class IInferRequestInternal;
 class ICompiledModelWrapper;
 }  // namespace InferenceEngine

@ -30,6 +29,7 @@ namespace ov {
 class CoreImpl;
 class IPlugin;
 class IExecutableNetworkWrapper;
+class IAsyncInferRequest;

 /**
 * @brief OpenVINO ICompiledModel interface
@ -73,9 +73,9 @@ public:
    /**
     * @brief Create infer request
     *
-     * @return Infer request interface
+     * @return Asynchronous infer request interface
     */
-    virtual std::shared_ptr<InferenceEngine::IInferRequestInternal> create_infer_request() const;
+    virtual std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const;

    /**
     * @brief Export compiled model to stream
@ -141,7 +141,7 @@ protected:
     *
     * @return Sync infer request
     */
-    virtual std::shared_ptr<InferenceEngine::IInferRequestInternal> create_sync_infer_request() const = 0;
+    virtual std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const = 0;

    /**
     * @brief Default implementation of create async inter request method
@ -149,11 +149,11 @@ protected:
     * @tparam AsyncInferRequestType Async infer request type. InferenceEngine::AsyncInferRequestThreadSafeDefault by
     * default
     *
-     * @return Async infer request
+     * @return Asynchronous infer request
     */
-    template <typename AsyncInferRequestType = InferenceEngine::AsyncInferRequestThreadSafeDefault>
-    std::shared_ptr<InferenceEngine::IInferRequestInternal> create_async_infer_request() const {
-        std::shared_ptr<InferenceEngine::IInferRequestInternal> syncRequestImpl = this->create_sync_infer_request();
+    template <typename AsyncInferRequestType = ov::IAsyncInferRequest>
+    std::shared_ptr<ov::IAsyncInferRequest> create_async_infer_request() const {
+        auto syncRequestImpl = create_sync_infer_request();
        return std::make_shared<AsyncInferRequestType>(syncRequestImpl, m_task_executor, m_callback_executor);
    }

--- a/src/inference/dev_api/openvino/runtime/icore.hpp
+++ b/src/inference/dev_api/openvino/runtime/icore.hpp
@ -12,6 +12,7 @@
 #include <memory>

 #include "openvino/runtime/icompiled_model.hpp"
+#include "openvino/runtime/properties.hpp"
 #include "openvino/runtime/tensor.hpp"
 #include "so_ptr.hpp"

--- a/src/inference/dev_api/openvino/runtime/iinfer_request.hpp
+++ b/src/inference/dev_api/openvino/runtime/iinfer_request.hpp
@ -0,0 +1,121 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief OpenVINO Runtime InferRequest interface
+ * @file openvino/runtime/iinfer_request.hpp
+ */
+
+#pragma once
+
+#include <exception>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "openvino/runtime/common.hpp"
+#include "openvino/runtime/profiling_info.hpp"
+#include "openvino/runtime/tensor.hpp"
+
+namespace ov {
+
+class IAsyncInferRequest;
+class ICompiledModel;
+
+class OPENVINO_RUNTIME_API IInferRequest {
+public:
+    virtual ~IInferRequest();
+
+    /**
+     * @brief Infers specified input(s) in synchronous mode
+     * @note blocks all method of InferRequest while request is ongoing (running or waiting in queue)
+     */
+    virtual void infer() = 0;
+
+    /**
+     * @brief Queries performance measures per layer to identify the most time consuming operation.
+     * @note Not all plugins provide meaningful data.
+     * @return Vector of profiling information for operations in a model.
+     */
+    virtual std::vector<ov::ProfilingInfo> get_profiling_info() const = 0;
+
+    /**
+     * @brief Gets an input/output tensor for inference.
+     * @note If the tensor with the specified @p port is not found, an exception is thrown.
+     * @param port Port of the tensor to get.
+     * @return Tensor for the port @p port.
+     */
+    virtual ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const = 0;
+
+    /**
+     * @brief Sets an input/output tensor to infer.
+     * @param port Port of the input or output tensor.
+     * @param tensor Reference to a tensor. The element_type and shape of a tensor must match
+     * the model's input/output element_type and size.
+     */
+    virtual void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) = 0;
+
+    /**
+     * @brief Gets a batch of tensors for input data to infer by input port.
+     * Model input must have batch dimension, and the number of @p tensors must match the batch size.
+     * The current version supports setting tensors to model inputs only. If @p port is associated
+     * with output (or any other non-input node), an exception is thrown.
+     *
+     * @param port Port of the input tensor.
+     * @param tensors Input tensors for batched infer request. The type of each tensor must match the model
+     * input element type and shape (except batch dimension). Total size of tensors must match the input size.
+     * @return vector of tensors
+     */
+    virtual std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const = 0;
+
+    /**
+     * @brief Sets a batch of tensors for input data to infer by input port.
+     * Model input must have batch dimension, and the number of @p tensors must match the batch size.
+     * The current version supports setting tensors to model inputs only. If @p port is associated
+     * with output (or any other non-input node), an exception is thrown.
+     *
+     * @param port Port of the input tensor.
+     * @param tensors Input tensors for batched infer request. The type of each tensor must match the model
+     * input element type and shape (except batch dimension). Total size of tensors must match the input size.
+     */
+    virtual void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) = 0;
+
+    /**
+     * @brief Gets state control interface for the given infer request.
+     *
+     * State control essential for recurrent models.
+     * @return Vector of Variable State objects.
+     */
+    virtual std::vector<ov::VariableState> query_state() const = 0;
+
+    /**
+     * @brief Gets pointer to compiled model (usually synchronous request holds the compiled model)
+     *
+     * @return Pointer to the compiled model
+     */
+    virtual const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const = 0;
+
+    /**
+     * @brief Gets inputs for infer request
+     *
+     * @return vector of input ports
+     */
+    virtual const std::vector<ov::Output<const ov::Node>>& get_inputs() const = 0;
+
+    /**
+     * @brief Gets outputs for infer request
+     *
+     * @return vector of output ports
+     */
+    virtual const std::vector<ov::Output<const ov::Node>>& get_outputs() const = 0;
+
+protected:
+    /**
+     * @brief Check that all tensors are valid. Throws an exception if it's not.
+     */
+    virtual void check_tensors() const = 0;
+    friend IAsyncInferRequest;
+};
+
+};  // namespace ov
--- a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp
+++ b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp
@ -0,0 +1,153 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief OpenVINO Runtime InferRequest interface
+ * @file openvino/runtime/isync_infer_request.hpp
+ */
+
+#pragma once
+
+#include <exception>
+#include <memory>
+#include <openvino/runtime/tensor.hpp>
+#include <unordered_map>
+#include <vector>
+
+#include "openvino/runtime/common.hpp"
+#include "openvino/runtime/iinfer_request.hpp"
+#include "openvino/runtime/profiling_info.hpp"
+
+namespace ov {
+
+/**
+ * @brief Interface for syncronous infer request
+ */
+class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest {
+public:
+    /**
+     * @brief Constructs syncronous inference request
+     *
+     * @param compiled_model pointer to compiled model
+     */
+    ISyncInferRequest(const std::shared_ptr<ov::ICompiledModel>& compiled_model);
+
+    /**
+     * @brief Gets an input/output tensor for inference.
+     * @note If the tensor with the specified @p port is not found, an exception is thrown.
+     * @param port Port of the tensor to get.
+     * @return Tensor for the port @p port.
+     */
+    ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const override;
+
+    /**
+     * @brief Sets an input/output tensor to infer.
+     * @param port Port of the input or output tensor.
+     * @param tensor Reference to a tensor. The element_type and shape of a tensor must match
+     * the model's input/output element_type and size.
+     */
+    void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override;
+
+    /**
+     * @brief Gets a batch of tensors for input data to infer by input port.
+     * Model input must have batch dimension, and the number of @p tensors must match the batch size.
+     * The current version supports setting tensors to model inputs only. If @p port is associated
+     * with output (or any other non-input node), an exception is thrown.
+     *
+     * @param port Port of the input tensor.
+     * @param tensors Input tensors for batched infer request. The type of each tensor must match the model
+     * input element type and shape (except batch dimension). Total size of tensors must match the input size.
+     * @return vector of tensors
+     */
+    std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const override;
+
+    /**
+     * @brief Sets a batch of tensors for input data to infer by input port.
+     * Model input must have batch dimension, and the number of @p tensors must match the batch size.
+     * The current version supports setting tensors to model inputs only. If @p port is associated
+     * with output (or any other non-input node), an exception is thrown.
+     *
+     * @param port Port of the input tensor.
+     * @param tensors Input tensors for batched infer request. The type of each tensor must match the model
+     * input element type and shape (except batch dimension). Total size of tensors must match the input size.
+     */
+    void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) override;
+
+    /**
+     * @brief Plugin implementation for set tensors
+     *
+     * @param port Port of the input tensor.
+     * @param tensors Input tensors for batched infer request. The type of each tensor must match the model
+     * input element type and shape (except batch dimension). Total size of tensors must match the input size.
+     */
+    virtual void set_tensors_impl(const ov::Output<const ov::Node> port, const std::vector<ov::Tensor>& tensors);
+
+    /**
+     * @brief Gets inputs for infer request
+     *
+     * @return vector of input ports
+     */
+    const std::vector<ov::Output<const ov::Node>>& get_inputs() const override;
+
+    /**
+     * @brief Gets outputs for infer request
+     *
+     * @return vector of output ports
+     */
+    const std::vector<ov::Output<const ov::Node>>& get_outputs() const override;
+
+    /**
+     * @brief Gets pointer to compiled model (usually synchronous request holds the compiled model)
+     *
+     * @return Pointer to the compiled model
+     */
+    const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const override;
+
+protected:
+    struct FoundPort {
+        size_t idx;
+        enum class Type { NOT_FOUND = 0, INPUT, OUTPUT } type;
+
+        bool found() {
+            return type != Type::NOT_FOUND;
+        }
+        bool is_input() {
+            return type == Type::INPUT;
+        }
+        bool is_output() {
+            return !is_input();
+        }
+    };
+
+    /**
+     * @brief Finds input or output port
+     * @return structure which contains index of Input/Output or report that port wasn't found
+     */
+    FoundPort find_port(const ov::Output<const ov::Node>& port) const;
+    /**
+     * @brief Converts batched tensors to tensor
+     */
+    void convert_batched_tensors();
+    /**
+     * @brief Basic checks for input/output tensor
+     *
+     * @param port Input/Output port
+     * @param tensor Input/Output tensor
+     */
+    void check_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) const;
+
+    /**
+     * @brief Check that all tensors are valid. Throws an exception if it's not.
+     */
+    void check_tensors() const override;
+
+    std::vector<ov::Tensor> m_input_tensors;
+    std::vector<ov::Tensor> m_output_tensors;
+    std::unordered_map<size_t, std::vector<ov::Tensor>> m_batched_tensors;
+
+private:
+    std::shared_ptr<ov::ICompiledModel> m_compiled_model;
+};
+
+};  // namespace ov
--- a/src/inference/include/openvino/runtime/infer_request.hpp
+++ b/src/inference/include/openvino/runtime/infer_request.hpp
@ -19,20 +19,17 @@
 #include "openvino/runtime/tensor.hpp"
 #include "openvino/runtime/variable_state.hpp"

-namespace InferenceEngine {
-class IInferRequestInternal;
-}  // namespace InferenceEngine
-
 namespace ov {

 class CompiledModel;
+class IAsyncInferRequest;

 /**
 * @brief This is a class of infer request that can be run in asynchronous or synchronous manners.
 * @ingroup ov_runtime_cpp_api
 */
 class OPENVINO_RUNTIME_API InferRequest {
-    std::shared_ptr<InferenceEngine::IInferRequestInternal> _impl;
+    std::shared_ptr<ov::IAsyncInferRequest> _impl;
    std::shared_ptr<void> _so;

    /**
@ -41,7 +38,7 @@ class OPENVINO_RUNTIME_API InferRequest {
     * @param so Plugin to use. This is required to ensure that InferRequest can work properly even if a plugin object
     * is destroyed.
     */
-    InferRequest(const std::shared_ptr<InferenceEngine::IInferRequestInternal>& impl, const std::shared_ptr<void>& so);
+    InferRequest(const std::shared_ptr<ov::IAsyncInferRequest>& impl, const std::shared_ptr<void>& so);
    friend class ov::CompiledModel;

 public:
--- a/src/inference/include/openvino/runtime/remote_context.hpp
+++ b/src/inference/include/openvino/runtime/remote_context.hpp
@ -31,6 +31,7 @@ class Core;
 class CoreImpl;
 class Plugin;
 class IPlugin;
+class ISyncInferRequest;
 class IInferencePluginWrapper;
 class IExecutableNetworkWrapper;
 class CompiledModel;
@ -62,6 +63,7 @@ protected:
    friend class ov::CoreImpl;
    friend class ov::Plugin;
    friend class ov::IPlugin;
+    friend class ov::ISyncInferRequest;
    friend class ov::IInferencePluginWrapper;
    friend class ov::IExecutableNetworkWrapper;
    friend class ov::CompiledModel;
--- a/src/inference/include/openvino/runtime/variable_state.hpp
+++ b/src/inference/include/openvino/runtime/variable_state.hpp
@ -17,11 +17,13 @@

 namespace InferenceEngine {
 class IVariableStateInternal;
+class IAsyncInferRequestWrapper;
 }  // namespace InferenceEngine

 namespace ov {

 class InferRequest;
+class IInferRequestInternalWrapper;

 /**
 * @brief VariableState class
@ -41,6 +43,8 @@ class OPENVINO_RUNTIME_API VariableState {
                  const std::vector<std::shared_ptr<void>>& so);

    friend class ov::InferRequest;
+    friend class ov::IInferRequestInternalWrapper;
+    friend class InferenceEngine::IAsyncInferRequestWrapper;

 public:
    /**
--- a/src/inference/src/cpp/ie_infer_request.cpp
+++ b/src/inference/src/cpp/ie_infer_request.cpp
@ -19,24 +19,6 @@
 #include "openvino/runtime/infer_request.hpp"
 #include "transformations/utils/utils.hpp"

-namespace {
-
-inline bool getPort(ov::Output<const ov::Node>& port,
-                    const std::string& name,
-                    const std::vector<std::vector<std::shared_ptr<const ov::Node>>>& ports) {
-    for (const auto& nodes : ports) {
-        for (const auto& node : nodes) {
-            const auto& names = node->get_output_tensor(0).get_names();
-            if (names.find(name) != names.end()) {
-                port = node->output(0);
-                return true;
-            }
-        }
-    }
-    return false;
-}
-
-}  // namespace
 namespace InferenceEngine {

 #define INFER_REQ_CALL_STATEMENT(...)                                     \
@ -48,18 +30,6 @@ namespace InferenceEngine {
        ::InferenceEngine::details::Rethrow();                            \
    }

-#define OV_INFER_REQ_CALL_STATEMENT(...)                                    \
-    OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized."); \
-    try {                                                                   \
-        __VA_ARGS__;                                                        \
-    } catch (const ::InferenceEngine::RequestBusy& ex) {                    \
-        throw ov::Busy(ex.what());                                          \
-    } catch (const std::exception& ex) {                                    \
-        throw ov::Exception(ex.what());                                     \
-    } catch (...) {                                                         \
-        OPENVINO_ASSERT(false, "Unexpected exception");                     \
-    }
-
 InferRequest::~InferRequest() {
    _impl = {};
 }
@ -237,301 +207,3 @@ bool InferRequest::operator==(const InferRequest& r) const noexcept {
 }

 }  // namespace InferenceEngine
-
-namespace {
-
-std::string get_legacy_name_from_port(const ov::Output<const ov::Node>& port) {
-    ov::Output<ngraph::Node> p(std::const_pointer_cast<ov::Node>(port.get_node_shared_ptr()), port.get_index());
-    if (auto node = std::dynamic_pointer_cast<ov::op::v0::Result>(p.get_node_shared_ptr())) {
-        p = node->input_value(0);
-    }
-    return ov::op::util::create_ie_output_name(p);
-}
-
-}  // namespace
-
-namespace ov {
-
-InferRequest::~InferRequest() {
-    _impl = {};
-}
-
-InferRequest::InferRequest(const ie::IInferRequestInternal::Ptr& impl, const std::shared_ptr<void>& so)
-    : _impl{impl},
-      _so{so} {
-    OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
-}
-
-void InferRequest::set_tensor(const ov::Output<const ov::Node>& port, const Tensor& tensor) {
-    OV_INFER_REQ_CALL_STATEMENT({ _impl->SetBlob(get_legacy_name_from_port(port), tensor._impl); });
-}
-
-void InferRequest::set_tensor(const ov::Output<ov::Node>& port, const Tensor& tensor) {
-    set_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()), tensor);
-}
-
-void InferRequest::set_tensor(const std::string& name, const Tensor& tensor) {
-    OV_INFER_REQ_CALL_STATEMENT({
-        ov::Output<const ov::Node> port;
-        OPENVINO_ASSERT(::getPort(port, name, {_impl->GetInputs(), _impl->GetOutputs()}),
-                        "Port for tensor name " + name + " was not found.");
-        set_tensor(port, tensor);
-    });
-}
-
-void InferRequest::set_tensors(const std::string& name, const std::vector<Tensor>& tensors) {
-    OV_INFER_REQ_CALL_STATEMENT({
-        ov::Output<const ov::Node> port;
-        OPENVINO_ASSERT(::getPort(port, name, {_impl->GetInputs()}),
-                        "set_tensors error. Input port for tensor name ",
-                        name,
-                        " was not found.");
-        set_tensors(port, tensors);
-    })
-}
-
-void InferRequest::set_tensors(const ov::Output<const ov::Node>& port, const std::vector<Tensor>& tensors) {
-    auto impls = std::vector<InferenceEngine::Blob::Ptr>();
-    std::transform(tensors.begin(), tensors.end(), std::back_inserter(impls), [](const Tensor& item) {
-        return item._impl;
-    });
-    OV_INFER_REQ_CALL_STATEMENT({ _impl->SetBlobs(get_legacy_name_from_port(port), impls); })
-}
-
-void InferRequest::set_input_tensor(size_t idx, const Tensor& tensor) {
-    OV_INFER_REQ_CALL_STATEMENT({
-        const auto& inputs = _impl->GetInputs();
-        OPENVINO_ASSERT(inputs.size() > idx,
-                        "Input port for index ",
-                        idx,
-                        " was not found! The model has only ",
-                        inputs.size(),
-                        " inputs.");
-        set_tensor(inputs.at(idx)->output(0), tensor);
-    });
-}
-
-void InferRequest::set_input_tensor(const Tensor& tensor) {
-    OV_INFER_REQ_CALL_STATEMENT({
-        const auto inputs = _impl->GetInputs();
-        OPENVINO_ASSERT(inputs.size() == 1,
-                        "set_input_tensor() must be called on a function with exactly one parameter.");
-        set_tensor(inputs.at(0)->output(0), tensor);
-    });
-}
-
-void InferRequest::set_input_tensors(size_t idx, const std::vector<Tensor>& tensors) {
-    OV_INFER_REQ_CALL_STATEMENT({
-        OPENVINO_ASSERT(idx < _impl->GetInputs().size(),
-                        "set_input_tensors error. Input port for index ",
-                        idx,
-                        " is out of bounds. Model has only ",
-                        _impl->GetInputs().size(),
-                        " inputs");
-        set_tensors(_impl->GetInputs().at(idx)->output(0), tensors);
-    })
-}
-
-void InferRequest::set_input_tensors(const std::vector<Tensor>& tensors) {
-    OV_INFER_REQ_CALL_STATEMENT({
-        OPENVINO_ASSERT(_impl->GetInputs().size() == 1,
-                        "set_input_tensors(tensors) must be used for single-input models only. Model has ",
-                        _impl->GetInputs().size(),
-                        " inputs");
-        set_tensors(_impl->GetInputs().at(0)->output(0), tensors);
-    })
-}
-
-void InferRequest::set_output_tensor(size_t idx, const Tensor& tensor) {
-    OV_INFER_REQ_CALL_STATEMENT({
-        const auto& outputs = _impl->GetOutputs();
-        OPENVINO_ASSERT(outputs.size() > idx,
-                        "Output port for index ",
-                        idx,
-                        " was not found! The model has only ",
-                        outputs.size(),
-                        " outputs.");
-        set_tensor(outputs.at(idx)->output(0), tensor);
-    });
-}
-
-void InferRequest::set_output_tensor(const Tensor& tensor) {
-    OV_INFER_REQ_CALL_STATEMENT({
-        const auto outputs = _impl->GetOutputs();
-        OPENVINO_ASSERT(outputs.size() == 1,
-                        "set_output_tensor() must be called on a function with exactly one parameter.");
-        set_tensor(outputs.at(0)->output(0), tensor);
-    });
-}
-
-Tensor InferRequest::get_tensor(const ov::Output<const ov::Node>& port) {
-    std::vector<std::shared_ptr<void>> soVec;
-    OV_INFER_REQ_CALL_STATEMENT({
-        const auto& name = get_legacy_name_from_port(port);
-        OPENVINO_ASSERT(!_impl->GetBlobs(name),
-                        "get_tensor shall not be used together with batched "
-                        "set_tensors/set_input_tensors for name '",
-                        name,
-                        "'");
-        auto blob = _impl->GetBlob(name);
-        soVec = {_so, _impl->getPointerToSo()};
-        Tensor tensor = {blob, soVec};
-        return tensor;
-    });
-}
-
-Tensor InferRequest::get_tensor(const ov::Output<ov::Node>& port) {
-    return get_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()));
-}
-
-Tensor InferRequest::get_tensor(const std::string& name) {
-    OV_INFER_REQ_CALL_STATEMENT({
-        ov::Output<const ov::Node> port;
-        OPENVINO_ASSERT(::getPort(port, name, {_impl->GetInputs(), _impl->GetOutputs()}),
-                        "Port for tensor name " + name + " was not found.");
-        return get_tensor(port);
-    });
-}
-
-Tensor InferRequest::get_input_tensor(size_t idx) {
-    OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->GetInputs().at(idx)->output(0)); });
-}
-
-Tensor InferRequest::get_output_tensor(size_t idx) {
-    OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->GetOutputs().at(idx)->output(0)); });
-}
-
-Tensor InferRequest::get_input_tensor() {
-    OV_INFER_REQ_CALL_STATEMENT({
-        const auto inputs = _impl->GetInputs();
-        if (inputs.size() != 1) {
-            throw ov::Exception("get_input_tensor() must be called on a function with exactly one parameter.");
-        }
-        return get_tensor(inputs.at(0)->output(0));
-    });
-}
-
-Tensor InferRequest::get_output_tensor() {
-    OV_INFER_REQ_CALL_STATEMENT({
-        const auto outputs = _impl->GetOutputs();
-        if (outputs.size() != 1) {
-            throw ov::Exception("get_output_tensor() must be called on a function with exactly one parameter.");
-        }
-        return get_tensor(outputs.at(0)->output(0));
-    });
-}
-
-void InferRequest::infer() {
-    OV_INFER_REQ_CALL_STATEMENT(_impl->Infer();)
-}
-
-void InferRequest::cancel() {
-    OV_INFER_REQ_CALL_STATEMENT(_impl->Cancel();)
-}
-
-std::vector<ProfilingInfo> InferRequest::get_profiling_info() const {
-    OV_INFER_REQ_CALL_STATEMENT({
-        auto ieInfos = _impl->GetPerformanceCounts();
-        std::vector<ProfilingInfo> infos;
-        infos.reserve(ieInfos.size());
-        while (!ieInfos.empty()) {
-            auto itIeInfo = std::min_element(
-                std::begin(ieInfos),
-                std::end(ieInfos),
-                [](const decltype(ieInfos)::value_type& lhs, const decltype(ieInfos)::value_type& rhs) {
-                    return lhs.second.execution_index < rhs.second.execution_index;
-                });
-            IE_ASSERT(itIeInfo != ieInfos.end());
-            auto& ieInfo = itIeInfo->second;
-            infos.push_back(ProfilingInfo{});
-            auto& info = infos.back();
-            switch (ieInfo.status) {
-            case ie::InferenceEngineProfileInfo::NOT_RUN:
-                info.status = ProfilingInfo::Status::NOT_RUN;
-                break;
-            case ie::InferenceEngineProfileInfo::OPTIMIZED_OUT:
-                info.status = ProfilingInfo::Status::OPTIMIZED_OUT;
-                break;
-            case ie::InferenceEngineProfileInfo::EXECUTED:
-                info.status = ProfilingInfo::Status::EXECUTED;
-                break;
-            }
-            info.real_time = std::chrono::microseconds{ieInfo.realTime_uSec};
-            info.cpu_time = std::chrono::microseconds{ieInfo.cpu_uSec};
-            info.node_name = itIeInfo->first;
-            info.exec_type = std::string{ieInfo.exec_type};
-            info.node_type = std::string{ieInfo.layer_type};
-            ieInfos.erase(itIeInfo);
-        }
-        return infos;
-    })
-}
-
-void InferRequest::start_async() {
-    OV_INFER_REQ_CALL_STATEMENT(_impl->StartAsync();)
-}
-
-void InferRequest::wait() {
-    OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
-    try {
-        _impl->Wait(ie::InferRequest::RESULT_READY);
-    } catch (const ie::InferCancelled& e) {
-        throw Cancelled{e.what()};
-    } catch (const std::exception& ex) {
-        throw Exception(ex.what());
-    } catch (...) {
-        OPENVINO_UNREACHABLE("Unexpected exception");
-    }
-}
-
-bool InferRequest::wait_for(const std::chrono::milliseconds timeout) {
-    OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
-    try {
-        return _impl->Wait(timeout.count()) == ie::OK;
-    } catch (const ie::InferCancelled& e) {
-        throw Cancelled{e.what()};
-    } catch (const std::exception& ex) {
-        throw Exception(ex.what());
-    } catch (...) {
-        OPENVINO_UNREACHABLE("Unexpected exception");
-    }
-}
-
-void InferRequest::set_callback(std::function<void(std::exception_ptr)> callback) {
-    OV_INFER_REQ_CALL_STATEMENT(_impl->SetCallback(std::move(callback));)
-}
-
-std::vector<VariableState> InferRequest::query_state() {
-    std::vector<VariableState> variable_states;
-    std::vector<std::shared_ptr<void>> soVec;
-    OV_INFER_REQ_CALL_STATEMENT({
-        soVec = {_so, _impl->getPointerToSo()};
-        for (auto&& state : _impl->QueryState()) {
-            variable_states.emplace_back(VariableState{state, soVec});
-        }
-    })
-    return variable_states;
-}
-
-CompiledModel InferRequest::get_compiled_model() {
-    OV_INFER_REQ_CALL_STATEMENT(
-        return {ov::legacy_convert::convert_compiled_model(_impl->getPointerToExecutableNetworkInternal()), _so});
-}
-
-bool InferRequest::operator!() const noexcept {
-    return !_impl;
-}
-
-InferRequest::operator bool() const noexcept {
-    return (!!_impl);
-}
-
-bool InferRequest::operator!=(const InferRequest& r) const noexcept {
-    return !(r == *this);
-}
-
-bool InferRequest::operator==(const InferRequest& r) const noexcept {
-    return r._impl == _impl;
-}
-
-}  // namespace ov
--- a/src/inference/src/dev/converter_utils.cpp
+++ b/src/inference/src/dev/converter_utils.cpp
@ -4,36 +4,48 @@

 #include "converter_utils.hpp"

-#include <ie_blob.h>
-#include <ie_common.h>
-#include <ie_compound_blob.h>
-#include <ie_layouts.h>
-
 #include <fstream>
-#include <ie_input_info.hpp>
-#include <ie_plugin_config.hpp>
-#include <ie_version.hpp>
 #include <memory>
-#include <openvino/core/except.hpp>
-#include <openvino/op/parameter.hpp>
-#include <openvino/runtime/exception.hpp>
-#include <openvino/runtime/remote_context.hpp>
-#include <openvino/runtime/tensor.hpp>
+#include <mutex>

 #include "any_copy.hpp"
 #include "cnn_network_ngraph_impl.hpp"
 #include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
 #include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
 #include "icompiled_model_wrapper.hpp"
+#include "ie_blob.h"
+#include "ie_common.h"
+#include "ie_compound_blob.h"
 #include "ie_icore.hpp"
+#include "ie_input_info.hpp"
+#include "ie_layouts.h"
 #include "ie_ngraph_utils.hpp"
+#include "ie_plugin_config.hpp"
+#include "ie_version.hpp"
 #include "iplugin_wrapper.hpp"
+#include "openvino/core/except.hpp"
+#include "openvino/op/parameter.hpp"
+#include "openvino/runtime/exception.hpp"
+#include "openvino/runtime/icompiled_model.hpp"
+#include "openvino/runtime/iinfer_request.hpp"
 #include "openvino/runtime/iplugin.hpp"
+#include "openvino/runtime/profiling_info.hpp"
+#include "openvino/runtime/remote_context.hpp"
+#include "openvino/runtime/tensor.hpp"
+#include "openvino/runtime/variable_state.hpp"
 #include "so_ptr.hpp"
 #include "transformations/utils/utils.hpp"

 namespace {

+std::string get_legacy_name_from_port(const ov::Output<const ov::Node>& port) {
+    ov::Output<ngraph::Node> p(std::const_pointer_cast<ov::Node>(port.get_node_shared_ptr()), port.get_index());
+    if (auto node = std::dynamic_pointer_cast<ov::op::v0::Result>(p.get_node_shared_ptr())) {
+        p = node->input_value(0);
+    }
+    return ov::op::util::create_ie_output_name(p);
+}
+
 void fill_input_info(ov::Output<ov::Node>& input, InferenceEngine::InputInfo::Ptr& input_info) {
    const ov::Output<const ov::Node> const_input(input.get_node(), input.get_index());
    ov::legacy_convert::fill_input_info(const_input, input_info);
@ -341,7 +353,9 @@ public:
    }

    std::shared_ptr<InferenceEngine::IInferRequestInternal> CreateInferRequest() override {
-        return m_model->create_infer_request();
+        auto infer_request = legacy_convert::convert_infer_request(m_model->create_infer_request());
+        infer_request->setPointerToExecutableNetworkInternal(shared_from_this());
+        return infer_request;
    }

    void Export(std::ostream& model) override {
@ -397,3 +411,312 @@ std::shared_ptr<ov::ICompiledModel> ov::legacy_convert::convert_compiled_model(
    }
    return std::make_shared<InferenceEngine::ICompiledModelWrapper>(model);
 }
+
+namespace ov {
+
+class IInferRequestInternalWrapper : public InferenceEngine::IInferRequestInternal {
+    ov::Output<const ov::Node> find_port(const std::string& legacy_name) const {
+        for (const auto& port : m_request->get_inputs()) {
+            if (get_legacy_name_from_port(port) == legacy_name)
+                return port;
+        }
+        for (const auto& port : m_request->get_outputs()) {
+            if (get_legacy_name_from_port(port) == legacy_name)
+                return port;
+        }
+        OPENVINO_ASSERT(false, "Cannot find port with name: ", legacy_name);
+    }
+
+public:
+    explicit IInferRequestInternalWrapper(const std::shared_ptr<ov::IAsyncInferRequest>& request)
+        : m_request(request) {}
+
+    void Infer() override {
+        m_request->infer();
+    }
+
+    void Cancel() override {
+        m_request->cancel();
+    }
+
+    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override {
+        auto res = m_request->get_profiling_info();
+        std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> ret;
+        for (const auto& info : res) {
+            InferenceEngine::InferenceEngineProfileInfo old_info;
+            old_info.cpu_uSec = info.cpu_time.count();
+            old_info.realTime_uSec = info.real_time.count();
+            strncpy(old_info.exec_type, info.exec_type.c_str(), sizeof(old_info.exec_type));
+            old_info.exec_type[sizeof(old_info.exec_type) - 1] = 0;
+            strncpy(old_info.layer_type, info.node_type.c_str(), sizeof(old_info.layer_type));
+            old_info.layer_type[sizeof(old_info.layer_type) - 1] = 0;
+            switch (info.status) {
+            case ov::ProfilingInfo::Status::EXECUTED:
+                old_info.status = InferenceEngine::InferenceEngineProfileInfo::EXECUTED;
+                break;
+            case ov::ProfilingInfo::Status::NOT_RUN:
+                old_info.status = InferenceEngine::InferenceEngineProfileInfo::NOT_RUN;
+                break;
+            case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
+                old_info.status = InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT;
+                break;
+            }
+            ret[info.node_name] = old_info;
+        }
+        return ret;
+    }
+
+    void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override {
+        m_request->set_tensor(find_port(name), ov::Tensor{data, {}});
+    }
+
+    void SetBlobs(const std::string& name, const std::vector<InferenceEngine::Blob::Ptr>& blobs) override {
+        std::vector<ov::Tensor> tensors;
+        for (const auto& blob : blobs) {
+            tensors.emplace_back(ov::Tensor{blob, {}});
+        }
+        m_request->set_tensors(find_port(name), tensors);
+    }
+
+    InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override {
+        return m_request->get_tensor(find_port(name))._impl;
+    }
+
+    InferenceEngine::BatchedBlob::Ptr GetBlobs(const std::string& name) override {
+        auto tensors = m_request->get_tensors(find_port(name));
+        std::vector<InferenceEngine::Blob::Ptr> blobs;
+        for (const auto& tensor : tensors) {
+            blobs.emplace_back(tensor._impl);
+        }
+        return std::make_shared<InferenceEngine::BatchedBlob>(blobs);
+    }
+
+    void SetBlob(const std::string& name,
+                 const InferenceEngine::Blob::Ptr& data,
+                 const InferenceEngine::PreProcessInfo& info) override {
+        OPENVINO_NOT_IMPLEMENTED;
+    }
+
+    const InferenceEngine::PreProcessInfo& GetPreProcess(const std::string& name) const override {
+        OPENVINO_NOT_IMPLEMENTED;
+    }
+
+    void SetBatch(int batch) override {
+        OPENVINO_NOT_IMPLEMENTED;
+    }
+
+    std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState() override {
+        auto res = m_request->query_state();
+        std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> ret;
+        for (const auto& state : res) {
+            ret.emplace_back(state._impl);
+        }
+        return ret;
+    }
+
+    void StartAsync() override {
+        m_request->start_async();
+    }
+
+    InferenceEngine::StatusCode Wait(int64_t millis_timeout) override {
+        if (millis_timeout == InferenceEngine::IInferRequest::RESULT_READY) {
+            m_request->wait();
+        } else {
+            std::chrono::milliseconds timeout(millis_timeout);
+            bool res = m_request->wait_for(timeout);
+            if (!res)
+                return InferenceEngine::StatusCode::RESULT_NOT_READY;
+        }
+        return InferenceEngine::StatusCode::OK;
+    }
+
+    void SetCallback(std::function<void(std::exception_ptr)> callback) override {
+        m_request->set_callback(std::move(callback));
+    }
+
+    std::shared_ptr<ov::IAsyncInferRequest> get_infer_request() {
+        return m_request;
+    }
+
+private:
+    std::shared_ptr<ov::IAsyncInferRequest> m_request;
+};
+
+}  // namespace ov
+
+namespace InferenceEngine {
+
+class IAsyncInferRequestWrapper : public ov::IAsyncInferRequest {
+public:
+    IAsyncInferRequestWrapper(const std::shared_ptr<InferenceEngine::IInferRequestInternal>& request)
+        : ov::IAsyncInferRequest(nullptr, nullptr, nullptr),
+          m_request(request) {
+        if (m_request->getPointerToExecutableNetworkInternal())
+            m_compiled_model =
+                ov::legacy_convert::convert_compiled_model(m_request->getPointerToExecutableNetworkInternal());
+    }
+    std::shared_ptr<InferenceEngine::IInferRequestInternal> get_infer_request() {
+        return m_request;
+    }
+
+    void infer() override {
+        m_request->Infer();
+    }
+    void start_async() override {
+        m_request->StartAsync();
+    }
+
+    void wait() override {
+        try {
+            m_request->Wait(InferenceEngine::InferRequest::RESULT_READY);
+        } catch (const ov::Cancelled&) {
+            throw;
+        } catch (const InferenceEngine::InferCancelled& e) {
+            throw ov::Cancelled{e.what()};
+        } catch (const std::exception& ex) {
+            throw ov::Exception(ex.what());
+        } catch (...) {
+            OPENVINO_UNREACHABLE("Unexpected exception");
+        }
+    }
+    bool wait_for(const std::chrono::milliseconds& timeout) override {
+        try {
+            return m_request->Wait(timeout.count()) == InferenceEngine::OK;
+        } catch (const InferenceEngine::InferCancelled& e) {
+            throw ov::Cancelled{e.what()};
+        } catch (const std::exception& ex) {
+            throw Exception(ex.what());
+        } catch (...) {
+            OPENVINO_UNREACHABLE("Unexpected exception");
+        }
+    }
+
+    void cancel() override {
+        m_request->Cancel();
+    }
+
+    std::vector<ov::ProfilingInfo> get_profiling_info() const override {
+        auto ieInfos = m_request->GetPerformanceCounts();
+        std::vector<ov::ProfilingInfo> infos;
+        infos.reserve(ieInfos.size());
+        while (!ieInfos.empty()) {
+            auto itIeInfo = std::min_element(
+                std::begin(ieInfos),
+                std::end(ieInfos),
+                [](const decltype(ieInfos)::value_type& lhs, const decltype(ieInfos)::value_type& rhs) {
+                    return lhs.second.execution_index < rhs.second.execution_index;
+                });
+            IE_ASSERT(itIeInfo != ieInfos.end());
+            auto& ieInfo = itIeInfo->second;
+            infos.push_back(ov::ProfilingInfo{});
+            auto& info = infos.back();
+            switch (ieInfo.status) {
+            case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
+                info.status = ov::ProfilingInfo::Status::NOT_RUN;
+                break;
+            case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
+                info.status = ov::ProfilingInfo::Status::OPTIMIZED_OUT;
+                break;
+            case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
+                info.status = ov::ProfilingInfo::Status::EXECUTED;
+                break;
+            }
+            info.real_time = std::chrono::microseconds{ieInfo.realTime_uSec};
+            info.cpu_time = std::chrono::microseconds{ieInfo.cpu_uSec};
+            info.node_name = itIeInfo->first;
+            info.exec_type = std::string{ieInfo.exec_type};
+            info.node_type = std::string{ieInfo.layer_type};
+            ieInfos.erase(itIeInfo);
+        }
+        return infos;
+    }
+
+    ov::Tensor get_tensor(const ov::Output<const ov::Node>& port) const override {
+        const auto& name = get_legacy_name_from_port(port);
+        OPENVINO_ASSERT(!m_request->GetBlobs(name),
+                        "get_tensor shall not be used together with batched "
+                        "set_tensors/set_input_tensors for name '",
+                        name,
+                        "'");
+        auto blob = m_request->GetBlob(name);
+        ov::Tensor tensor = {blob, {m_request->getPointerToSo()}};
+        return tensor;
+    }
+    void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override {
+        m_request->SetBlob(get_legacy_name_from_port(port), tensor._impl);
+    }
+
+    std::vector<ov::Tensor> get_tensors(const ov::Output<const ov::Node>& port) const override {
+        auto blobs = m_request->GetBlobs(get_legacy_name_from_port(port));
+        std::vector<ov::Tensor> ret;
+        if (!blobs)
+            return ret;
+        for (size_t i = 0; i < blobs->size(); i++) {
+            ret.emplace_back(ov::Tensor{blobs->getBlob(i), {m_request->getPointerToSo()}});
+        }
+        return ret;
+    }
+    void set_tensors(const ov::Output<const ov::Node>& port, const std::vector<ov::Tensor>& tensors) override {
+        std::vector<InferenceEngine::Blob::Ptr> blobs;
+        for (const auto& tensor : tensors) {
+            blobs.emplace_back(tensor._impl);
+        }
+        m_request->SetBlobs(get_legacy_name_from_port(port), blobs);
+    }
+
+    std::vector<ov::VariableState> query_state() const override {
+        std::vector<ov::VariableState> variable_states;
+        std::vector<std::shared_ptr<void>> soVec;
+        soVec = {m_request->getPointerToSo()};
+        for (auto&& state : m_request->QueryState()) {
+            variable_states.emplace_back(ov::VariableState{state, soVec});
+        }
+        return variable_states;
+    }
+
+    void set_callback(std::function<void(std::exception_ptr)> callback) override {
+        m_request->SetCallback(std::move(callback));
+    }
+
+    const std::shared_ptr<ov::ICompiledModel>& get_compiled_model() const override {
+        if (!m_compiled_model) {
+            std::lock_guard<std::mutex> lock(m_mutex);
+            if (!m_compiled_model) {
+                if (m_request->getPointerToExecutableNetworkInternal())
+                    m_compiled_model =
+                        ov::legacy_convert::convert_compiled_model(m_request->getPointerToExecutableNetworkInternal());
+            }
+        }
+        OPENVINO_ASSERT(m_compiled_model);
+        return m_compiled_model;
+    }
+
+    const std::vector<ov::Output<const ov::Node>>& get_inputs() const override {
+        return get_compiled_model()->inputs();
+    }
+    const std::vector<ov::Output<const ov::Node>>& get_outputs() const override {
+        return get_compiled_model()->outputs();
+    }
+
+private:
+    std::shared_ptr<InferenceEngine::IInferRequestInternal> m_request;
+    mutable std::shared_ptr<ov::ICompiledModel> m_compiled_model;
+    mutable std::mutex m_mutex;
+};
+
+}  // namespace InferenceEngine
+
+std::shared_ptr<::InferenceEngine::IInferRequestInternal> ov::legacy_convert::convert_infer_request(
+    const std::shared_ptr<::ov::IAsyncInferRequest>& request) {
+    if (auto comp_model = std::dynamic_pointer_cast<InferenceEngine::IAsyncInferRequestWrapper>(request)) {
+        return comp_model->get_infer_request();
+    }
+    return std::make_shared<ov::IInferRequestInternalWrapper>(request);
+}
+std::shared_ptr<::ov::IAsyncInferRequest> ov::legacy_convert::convert_infer_request(
+    const std::shared_ptr<::InferenceEngine::IInferRequestInternal>& request) {
+    if (auto comp_model = std::dynamic_pointer_cast<ov::IInferRequestInternalWrapper>(request)) {
+        return comp_model->get_infer_request();
+    }
+    return std::make_shared<InferenceEngine::IAsyncInferRequestWrapper>(request);
+}
--- a/src/inference/src/dev/converter_utils.hpp
+++ b/src/inference/src/dev/converter_utils.hpp
@ -5,8 +5,10 @@
 #pragma once

 #include "cpp/ie_cnn_network.h"
+#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
 #include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
 #include "openvino/core/model.hpp"
+#include "openvino/runtime/iasync_infer_request.hpp"
 #include "openvino/runtime/icompiled_model.hpp"
 #include "openvino/runtime/iplugin.hpp"

@ -22,11 +24,18 @@ std::shared_ptr<const ov::Model> convert_model(const InferenceEngine::CNNNetwork
 std::shared_ptr<::InferenceEngine::IInferencePlugin> convert_plugin(const std::shared_ptr<::ov::IPlugin>& plugin);
 std::shared_ptr<::ov::IPlugin> convert_plugin(const std::shared_ptr<::InferenceEngine::IInferencePlugin>& plugin);

-std::shared_ptr<::InferenceEngine::IExecutableNetworkInternal> convert_compiled_model(
+// TODO: remove export after changes in template plugin
+OPENVINO_RUNTIME_API std::shared_ptr<::InferenceEngine::IExecutableNetworkInternal> convert_compiled_model(
    const std::shared_ptr<::ov::ICompiledModel>& model);
 std::shared_ptr<::ov::ICompiledModel> convert_compiled_model(
    const std::shared_ptr<::InferenceEngine::IExecutableNetworkInternal>& model);

+// TODO: remove export after changes in template plugin
+OPENVINO_RUNTIME_API std::shared_ptr<::InferenceEngine::IInferRequestInternal> convert_infer_request(
+    const std::shared_ptr<::ov::IAsyncInferRequest>& request);
+OPENVINO_RUNTIME_API std::shared_ptr<::ov::IAsyncInferRequest> convert_infer_request(
+    const std::shared_ptr<::InferenceEngine::IInferRequestInternal>& request);
+
 }  // namespace legacy_convert
 }  // namespace ov

--- a/src/inference/src/dev/iasync_infer_request.cpp
+++ b/src/inference/src/dev/iasync_infer_request.cpp
@ -0,0 +1,266 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/runtime/iasync_infer_request.hpp"
+
+#include <memory>
+
+#include "openvino/runtime/isync_infer_request.hpp"
+#include "openvino/runtime/variable_state.hpp"
+#include "threading/ie_immediate_executor.hpp"
+#include "threading/ie_istreams_executor.hpp"
+
+namespace {
+
+struct ImmediateStreamsExecutor : public InferenceEngine::ITaskExecutor {
+    explicit ImmediateStreamsExecutor(const InferenceEngine::IStreamsExecutor::Ptr& streamsExecutor)
+        : _streamsExecutor{streamsExecutor} {}
+    void run(InferenceEngine::Task task) override {
+        _streamsExecutor->Execute(std::move(task));
+    }
+    InferenceEngine::IStreamsExecutor::Ptr _streamsExecutor;
+};
+
+}  // namespace
+
+ov::IAsyncInferRequest::~IAsyncInferRequest() {
+    stop_and_wait();
+}
+
+ov::IAsyncInferRequest::IAsyncInferRequest(const std::shared_ptr<IInferRequest>& request,
+                                           const InferenceEngine::ITaskExecutor::Ptr& task_executor,
+                                           const InferenceEngine::ITaskExecutor::Ptr& callback_executor)
+    : m_sync_request(request),
+      m_request_executor(task_executor),
+      m_callback_executor(callback_executor) {
+    if (m_request_executor && m_sync_request)
+        m_pipeline = {{m_request_executor, [this] {
+                           m_sync_request->infer();
+                       }}};
+    if (m_sync_request)
+        m_sync_pipeline = {{std::make_shared<InferenceEngine::ImmediateExecutor>(), [this] {
+                                m_sync_request->infer();
+                            }}};
+    auto streams_executor = std::dynamic_pointer_cast<InferenceEngine::IStreamsExecutor>(m_request_executor);
+    if (streams_executor != nullptr) {
+        m_sync_pipeline = {{std::make_shared<ImmediateStreamsExecutor>(std::move(streams_executor)), [this] {
+                                m_sync_request->infer();
+                            }}};
+    }
+}
+
+void ov::IAsyncInferRequest::wait() {
+    // Just use the last '_futures' member to wait pipeline completion
+    auto future = [&] {
+        std::lock_guard<std::mutex> lock{m_mutex};
+        return m_futures.empty() ? std::shared_future<void>{} : m_futures.back();
+    }();
+
+    if (!future.valid()) {
+        return;
+    }
+
+    future.wait();
+}
+
+bool ov::IAsyncInferRequest::wait_for(const std::chrono::milliseconds& timeout) {
+    OPENVINO_ASSERT(timeout >= std::chrono::milliseconds{0}, "Timeout can't be less than 0 for InferRequest::wait().");
+    auto status = std::future_status::deferred;
+
+    // Just use the last '_futures' member to wait pipeline completion
+    auto future = [&] {
+        std::lock_guard<std::mutex> lock{m_mutex};
+        return m_futures.empty() ? std::shared_future<void>{} : m_futures.back();
+    }();
+
+    if (!future.valid()) {
+        return false;
+    }
+
+    status = future.wait_for(std::chrono::milliseconds{timeout});
+
+    if (std::future_status::ready == status) {
+        future.get();
+        return true;
+    } else {
+        return false;
+    }
+}
+
+void ov::IAsyncInferRequest::cancel() {
+    std::lock_guard<std::mutex> lock{m_mutex};
+    if (m_state == InferState::BUSY) {
+        m_state = InferState::CANCELLED;
+    }
+}
+
+void ov::IAsyncInferRequest::set_callback(std::function<void(std::exception_ptr)> callback) {
+    check_state();
+    m_callback = std::move(callback);
+}
+
+std::vector<ov::VariableState> ov::IAsyncInferRequest::query_state() const {
+    check_state();
+    return m_sync_request->query_state();
+}
+
+void ov::IAsyncInferRequest::infer_thread_unsafe() {
+    run_first_stage(m_sync_pipeline.begin(), m_sync_pipeline.end(), m_sync_callback_executor);
+}
+
+void ov::IAsyncInferRequest::start_async_thread_unsafe() {
+    run_first_stage(m_pipeline.begin(), m_pipeline.end(), m_callback_executor);
+}
+
+void ov::IAsyncInferRequest::run_first_stage(const Pipeline::iterator itBeginStage,
+                                             const Pipeline::iterator itEndStage,
+                                             const InferenceEngine::ITaskExecutor::Ptr callbackExecutor) {
+    auto& firstStageExecutor = std::get<Stage_e::EXECUTOR>(*itBeginStage);
+    OPENVINO_ASSERT(nullptr != firstStageExecutor);
+    firstStageExecutor->run(make_next_stage_task(itBeginStage, itEndStage, std::move(callbackExecutor)));
+}
+
+InferenceEngine::Task ov::IAsyncInferRequest::make_next_stage_task(
+    const Pipeline::iterator itStage,
+    const Pipeline::iterator itEndStage,
+    const InferenceEngine::ITaskExecutor::Ptr callbackExecutor) {
+    return std::bind(
+        [this, itStage, itEndStage](InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) mutable {
+            std::exception_ptr currentException = nullptr;
+            auto& thisStage = *itStage;
+            auto itNextStage = itStage + 1;
+            try {
+                auto& stageTask = std::get<Stage_e::TASK>(thisStage);
+                OPENVINO_ASSERT(nullptr != stageTask);
+                stageTask();
+                if (itEndStage != itNextStage) {
+                    auto& nextStage = *itNextStage;
+                    auto& nextStageExecutor = std::get<Stage_e::EXECUTOR>(nextStage);
+                    OPENVINO_ASSERT(nullptr != nextStageExecutor);
+                    nextStageExecutor->run(make_next_stage_task(itNextStage, itEndStage, std::move(callbackExecutor)));
+                }
+            } catch (...) {
+                currentException = std::current_exception();
+            }
+
+            if ((itEndStage == itNextStage) || (nullptr != currentException)) {
+                auto lastStageTask = [this, currentException]() mutable {
+                    auto promise = std::move(m_promise);
+                    std::function<void(std::exception_ptr)> callback;
+                    {
+                        std::lock_guard<std::mutex> lock{m_mutex};
+                        m_state = InferState::IDLE;
+                        std::swap(callback, m_callback);
+                    }
+                    if (callback) {
+                        try {
+                            callback(currentException);
+                        } catch (...) {
+                            currentException = std::current_exception();
+                        }
+                        std::lock_guard<std::mutex> lock{m_mutex};
+                        if (!m_callback) {
+                            std::swap(callback, m_callback);
+                        }
+                    }
+                    if (nullptr == currentException) {
+                        promise.set_value();
+                    } else {
+                        promise.set_exception(currentException);
+                    }
+                };
+
+                if (nullptr == callbackExecutor) {
+                    lastStageTask();
+                } else {
+                    callbackExecutor->run(std::move(lastStageTask));
+                }
+            }
+        },
+        std::move(callbackExecutor));
+}
+
+void ov::IAsyncInferRequest::start_async() {
+    infer_impl([&] {
+        start_async_thread_unsafe();
+    });
+}
+
+void ov::IAsyncInferRequest::check_state() const {
+    std::lock_guard<std::mutex> lock{m_mutex};
+    switch (m_state) {
+    case InferState::BUSY:
+        throw ov::Busy("Infer Request is busy");
+    case InferState::CANCELLED:
+        throw ov::Cancelled("Infer Request was canceled");
+    default:
+        break;
+    }
+}
+
+std::vector<ov::ProfilingInfo> ov::IAsyncInferRequest::get_profiling_info() const {
+    check_state();
+    return m_sync_request->get_profiling_info();
+}
+
+ov::Tensor ov::IAsyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
+    check_state();
+    return m_sync_request->get_tensor(port);
+}
+
+void ov::IAsyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) {
+    check_state();
+    return m_sync_request->set_tensor(port, tensor);
+}
+
+std::vector<ov::Tensor> ov::IAsyncInferRequest::get_tensors(const ov::Output<const ov::Node>& port) const {
+    check_state();
+    return m_sync_request->get_tensors(port);
+}
+
+void ov::IAsyncInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
+                                         const std::vector<ov::Tensor>& tensors) {
+    check_state();
+    return m_sync_request->set_tensors(port, tensors);
+}
+
+void ov::IAsyncInferRequest::stop_and_wait() {
+    Futures futures;
+    InferState state = InferState::IDLE;
+    {
+        std::lock_guard<std::mutex> lock{m_mutex};
+        state = m_state;
+        if (state != InferState::STOP) {
+            m_callback = {};
+            m_state = InferState::STOP;
+            futures = std::move(m_futures);
+        }
+    }
+    if (state != InferState::STOP) {
+        for (auto&& future : futures) {
+            if (future.valid()) {
+                future.wait();
+            }
+        }
+    }
+}
+
+void ov::IAsyncInferRequest::infer() {
+    m_sync_request->infer();
+}
+
+void ov::IAsyncInferRequest::check_tensors() const {
+    m_sync_request->check_tensors();
+}
+
+const std::shared_ptr<ov::ICompiledModel>& ov::IAsyncInferRequest::get_compiled_model() const {
+    return m_sync_request->get_compiled_model();
+}
+
+const std::vector<ov::Output<const ov::Node>>& ov::IAsyncInferRequest::get_inputs() const {
+    return m_sync_request->get_inputs();
+}
+const std::vector<ov::Output<const ov::Node>>& ov::IAsyncInferRequest::get_outputs() const {
+    return m_sync_request->get_outputs();
+}
--- a/src/inference/src/dev/icompiled_model.cpp
+++ b/src/inference/src/dev/icompiled_model.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Intel Corporation
+// Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -78,7 +78,8 @@ const std::vector<ov::Output<const ov::Node>>& ov::ICompiledModel::outputs() con
 const std::vector<ov::Output<const ov::Node>>& ov::ICompiledModel::inputs() const {
    return m_inputs;
 }
-std::shared_ptr<InferenceEngine::IInferRequestInternal> ov::ICompiledModel::create_infer_request() const {
+
+std::shared_ptr<ov::IAsyncInferRequest> ov::ICompiledModel::create_infer_request() const {
    return create_async_infer_request();
 }

--- a/src/inference/src/dev/icompiled_model_wrapper.cpp
+++ b/src/inference/src/dev/icompiled_model_wrapper.cpp
@ -6,6 +6,8 @@

 #include <ie_plugin_config.hpp>

+#include "dev/converter_utils.hpp"
+
 InferenceEngine::ICompiledModelWrapper::ICompiledModelWrapper(
    const std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>& model)
    : ov::ICompiledModel(nullptr, ov::legacy_convert::convert_plugin(model->_plugin)),
@ -20,9 +22,9 @@ InferenceEngine::ICompiledModelWrapper::ICompiledModelWrapper(
    m_inputs = inputs;
    m_outputs = outputs;
 }
-std::shared_ptr<InferenceEngine::IInferRequestInternal> InferenceEngine::ICompiledModelWrapper::create_infer_request()
-    const {
-    return m_model->CreateInferRequest();
+
+std::shared_ptr<ov::IAsyncInferRequest> InferenceEngine::ICompiledModelWrapper::create_infer_request() const {
+    return ov::legacy_convert::convert_infer_request(m_model->CreateInferRequest());
 }

 void InferenceEngine::ICompiledModelWrapper::export_model(std::ostream& model) const {
--- a/src/inference/src/dev/icompiled_model_wrapper.hpp
+++ b/src/inference/src/dev/icompiled_model_wrapper.hpp
@ -13,7 +13,7 @@ namespace InferenceEngine {
 class ICompiledModelWrapper : public ov::ICompiledModel {
 public:
    ICompiledModelWrapper(const std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>& model);
-    std::shared_ptr<InferenceEngine::IInferRequestInternal> create_infer_request() const override;
+    std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;

    void export_model(std::ostream& model) const override;

@ -30,7 +30,7 @@ public:
 private:
    std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> m_model;

-    std::shared_ptr<InferenceEngine::IInferRequestInternal> create_sync_infer_request() const override {
+    std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override {
        OPENVINO_NOT_IMPLEMENTED;
    }
 };
--- a/src/inference/src/dev/isync_infer_request.cpp
+++ b/src/inference/src/dev/isync_infer_request.cpp
@ -0,0 +1,238 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/runtime/isync_infer_request.hpp"
+
+#include "cpp_interfaces/plugin_itt.hpp"
+#include "openvino/core/except.hpp"
+#include "openvino/core/layout.hpp"
+#include "openvino/core/parallel.hpp"
+#include "openvino/op/util/op_types.hpp"
+#include "openvino/runtime/icompiled_model.hpp"
+#include "openvino/runtime/iinfer_request.hpp"
+#include "openvino/runtime/remote_context.hpp"
+#include "openvino/runtime/tensor.hpp"
+
+namespace {
+
+void check_batched_tensors(const ov::Output<const ov::Node>& input, const std::vector<ov::Tensor>& tensors) {
+    OPENVINO_ASSERT(!tensors.empty(), "set_input_tensors/set_tensors can't be called with empty tensors");
+    OPENVINO_ASSERT(
+        tensors.size() != 1,
+        "Internal error (plugin): check_batched_tensors is not allowed to have only one tensor inside batch");
+
+    auto layout = ov::layout::get_layout(input);
+    OPENVINO_ASSERT(ov::layout::has_batch(layout),
+                    "set_input_tensors/set_tensors can be used only for inputs with N(batch) dimension"
+                    " 'layout' defined. Current layout is ",
+                    layout.to_string());
+    auto batch_idx = ov::layout::batch_idx(layout);
+    if (batch_idx < 0) {
+        // TODO: Do we need this logic?
+        batch_idx += static_cast<int64_t>(tensors[0].get_shape().size());
+    }
+    OPENVINO_ASSERT(batch_idx == 0,
+                    "set_input_tensors/set_tensors is not currently supported for batch dimension index ",
+                    batch_idx,
+                    " != 0");
+    std::for_each(tensors.begin(), tensors.end(), [&batch_idx](const ov::Tensor& item) {
+        OPENVINO_ASSERT(item.get_shape()[batch_idx] == 1,
+                        "set_input_tensors/set_tensors. Tensors shall represent one item in a batch, ",
+                        item.get_shape()[batch_idx],
+                        " provided");
+    });
+    auto tensors_size = static_cast<int>(tensors.size());
+    if (input.get_partial_shape().rank().is_static()) {
+        OPENVINO_ASSERT(batch_idx >= 0 && batch_idx < input.get_partial_shape().rank().get_length(),
+                        "set_input_tensors/set_tensors error. Layout ",
+                        layout.to_string(),
+                        " is incorrect for operation with shape ",
+                        input.get_partial_shape());
+        auto batch = input.get_partial_shape()[batch_idx];
+
+        OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size,
+                        "set_input_tensors/set_tensors error. Input shape ",
+                        input.get_partial_shape(),
+                        "batch ",
+                        batch,
+                        "doesn't match with total blobs count: ",
+                        tensors_size);
+    }
+
+    // In future consider checking if blobs point to contiguous range of memory and use single 'SetBlob' instead
+    auto batched_shape = tensors[0].get_shape();
+    auto element_type = tensors[0].get_element_type();
+    batched_shape[batch_idx] = tensors_size;
+    for (const auto& item : tensors) {
+        auto item_shape = item.get_shape();
+        item_shape[batch_idx] = batched_shape[batch_idx];
+        OPENVINO_ASSERT(item_shape == batched_shape && item.get_element_type() == element_type &&
+                            "set_input_tensors/set_tensors error. Tensor with element type ",
+                        item.get_element_type(),
+                        " and shape ",
+                        item_shape,
+                        " is not compatible with batched tensor with element type ",
+                        element_type,
+                        " and shape ",
+                        batched_shape);
+    }
+}
+
+}  // namespace
+
+ov::IInferRequest::~IInferRequest() = default;
+
+ov::ISyncInferRequest::ISyncInferRequest(const std::shared_ptr<ov::ICompiledModel>& compiled_model)
+    : m_compiled_model(compiled_model) {}
+
+const std::vector<ov::Output<const ov::Node>>& ov::ISyncInferRequest::get_inputs() const {
+    return m_compiled_model->inputs();
+}
+const std::vector<ov::Output<const ov::Node>>& ov::ISyncInferRequest::get_outputs() const {
+    return m_compiled_model->outputs();
+}
+const std::shared_ptr<ov::ICompiledModel>& ov::ISyncInferRequest::get_compiled_model() const {
+    return m_compiled_model;
+}
+
+ov::ISyncInferRequest::FoundPort ov::ISyncInferRequest::find_port(const ov::Output<const ov::Node>& port) const {
+    ov::ISyncInferRequest::FoundPort::Type type = ov::ISyncInferRequest::FoundPort::Type::INPUT;
+    for (const auto& ports : {get_inputs(), get_outputs()}) {
+        for (size_t i = 0; i < ports.size(); i++) {
+            if (ports[i] == port) {
+                return {i, type};
+            }
+        }
+        type = ov::ISyncInferRequest::FoundPort::Type::OUTPUT;
+    }
+    return {0, ov::ISyncInferRequest::FoundPort::Type::NOT_FOUND};
+}
+
+void ov::ISyncInferRequest::convert_batched_tensors() {
+    for (const auto& item : m_batched_tensors) {
+        auto tmp_shape = item.second.at(0).get_shape();
+        auto tmp_et = item.second.at(0).get_element_type();
+        tmp_shape[0] = item.second.size();
+        ov::RemoteContext remote_context;
+        ov::Tensor input_tensor;
+        try {
+            auto net = get_compiled_model();
+            if (net) {
+                remote_context = net->get_context();
+            }
+        } catch (const ov::NotImplemented&) {
+        }
+        if (remote_context._impl) {
+            input_tensor = remote_context.create_host_tensor(tmp_et, tmp_shape);
+        } else {
+            input_tensor = ov::Tensor(tmp_et, tmp_shape);
+        }
+        auto ptr = input_tensor.data<uint8_t>();
+
+        // Perform memory copy
+        ov::parallel_for(input_tensor.get_size(), [&](size_t i) {
+            const auto& tensor = item.second.at(i);
+            memcpy(ptr + i * tensor.get_byte_size(), tensor.data<uint8_t>(), tensor.get_byte_size());
+        });
+        set_tensor(get_inputs()[item.first], input_tensor);
+    }
+}
+
+ov::Tensor ov::ISyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
+    OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "get_tensor");
+    auto found_port = find_port(port);
+    OPENVINO_ASSERT(!found_port.found(), "Cannot find tensor for port ", port);
+    if (found_port.is_input()) {
+        auto input = m_compiled_model->inputs().at(found_port.idx);
+        // TODO: Support dynamic inputs
+        // if (input.get_partial_shape().is_dynamic())
+        return m_input_tensors.at(found_port.idx);
+    }
+
+    auto output = m_compiled_model->outputs().at(found_port.idx);
+    // TODO: Support dynamic inputs
+    // if (output.get_partial_shape().is_dynamic())
+    return m_output_tensors.at(found_port.idx);
+}
+
+void ov::ISyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) {
+    OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "set_tensor");
+    auto found_port = find_port(port);
+    OPENVINO_ASSERT(!found_port.found(), "Cannot find tensor for port ", port);
+    OPENVINO_ASSERT(
+        port.get_element_type() == tensor.get_element_type(),
+        "Failed to set output tensor, the tensor element type is not corresponding with output element type");
+    OPENVINO_ASSERT(port.get_partial_shape().is_dynamic() || tensor.get_shape() == port.get_shape(),
+                    "Input tensor size is not equal with model input size (",
+                    tensor.get_shape(),
+                    " != ",
+                    port.get_shape(),
+                    ").");
+    if (found_port.is_input()) {
+        m_input_tensors.at(found_port.idx) = tensor;
+        m_batched_tensors.erase(found_port.idx);
+    } else {
+        m_output_tensors.at(found_port.idx) = tensor;
+    }
+}
+
+std::vector<ov::Tensor> ov::ISyncInferRequest::get_tensors(const ov::Output<const ov::Node>& port) const {
+    OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "get_tensors");
+    auto found_port = find_port(port);
+    OPENVINO_ASSERT(!found_port.found() && found_port.is_input(), "Cannot find input tensors for port ", port);
+    if (m_batched_tensors.count(found_port.idx))
+        return m_batched_tensors.at(found_port.idx);
+    return {};
+}
+
+void ov::ISyncInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
+                                        const std::vector<ov::Tensor>& tensors) {
+    OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::Plugin, "set_tensors");
+    auto found_port = find_port(port);
+    OPENVINO_ASSERT(!found_port.found() && found_port.is_input(), "Cannot find input tensors for port ", port);
+    if (tensors.size() == 1) {
+        set_tensor(port, tensors[0]);
+        return;
+    }
+
+    check_batched_tensors(port, tensors);
+    set_tensors_impl(port, tensors);
+}
+
+void ov::ISyncInferRequest::set_tensors_impl(const ov::Output<const ov::Node> port,
+                                             const std::vector<ov::Tensor>& tensors) {
+    OPENVINO_ASSERT_HELPER(::ov::NotImplemented,
+                           "",
+                           false,
+                           "Not Implemented",
+                           "set_input_tensors/set_tensors are not supported by this plugin");
+}
+
+void ov::ISyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) const {
+    bool is_input = ov::op::util::is_parameter(port.get_node());
+    std::string tensor_type = is_input ? "input" : "output";
+
+    bool is_dynamic = port.get_partial_shape().is_dynamic();
+    OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor.get_shape(),
+                    "The ",
+                    tensor_type,
+                    " tensor size is not equal to the model ",
+                    tensor_type,
+                    " type: got ",
+                    tensor.get_size(),
+                    " expecting ",
+                    port.get_shape(),
+                    ".");
+}
+
+void ov::ISyncInferRequest::check_tensors() const {
+    const auto& inputs = m_compiled_model->inputs();
+    for (size_t i = 0; i < inputs.size(); i++) {
+        check_tensor(inputs[i], m_input_tensors[i]);
+    }
+    const auto& outputs = m_compiled_model->outputs();
+    for (size_t i = 0; i < outputs.size(); i++) {
+        check_tensor(outputs[i], m_output_tensors[i]);
+    }
+}
--- a/src/inference/src/infer_request.cpp
+++ b/src/inference/src/infer_request.cpp
@ -0,0 +1,295 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/runtime/infer_request.hpp"
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "ie_common.h"
+#include "openvino/core/node.hpp"
+#include "openvino/runtime/compiled_model.hpp"
+#include "openvino/runtime/exception.hpp"
+#include "openvino/runtime/iasync_infer_request.hpp"
+#include "transformations/utils/utils.hpp"
+
+#define OV_INFER_REQ_CALL_STATEMENT(...)                                    \
+    OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized."); \
+    try {                                                                   \
+        __VA_ARGS__;                                                        \
+    } catch (const ::InferenceEngine::RequestBusy& ex) {                    \
+        throw ov::Busy(ex.what());                                          \
+    } catch (const std::exception& ex) {                                    \
+        throw ov::Exception(ex.what());                                     \
+    } catch (...) {                                                         \
+        OPENVINO_ASSERT(false, "Unexpected exception");                     \
+    }
+
+namespace {
+
+inline bool getPort(ov::Output<const ov::Node>& res_port,
+                    const std::string& name,
+                    const std::vector<std::vector<ov::Output<const ov::Node>>>& vector_ports) {
+    for (const auto& ports : vector_ports) {
+        for (const auto& port : ports) {
+            const auto& names = port.get_names();
+            if (names.find(name) != names.end()) {
+                res_port = port;
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+}  // namespace
+
+namespace ov {
+
+InferRequest::~InferRequest() {
+    _impl = {};
+}
+
+InferRequest::InferRequest(const std::shared_ptr<ov::IAsyncInferRequest>& impl, const std::shared_ptr<void>& so)
+    : _impl{impl},
+      _so{so} {
+    OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
+}
+
+void InferRequest::set_tensor(const ov::Output<const ov::Node>& port, const Tensor& tensor) {
+    OV_INFER_REQ_CALL_STATEMENT({ _impl->set_tensor(port, tensor); });
+}
+
+void InferRequest::set_tensor(const ov::Output<ov::Node>& port, const Tensor& tensor) {
+    set_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()), tensor);
+}
+
+void InferRequest::set_tensor(const std::string& name, const Tensor& tensor) {
+    OV_INFER_REQ_CALL_STATEMENT({
+        ov::Output<const ov::Node> port;
+        OPENVINO_ASSERT(::getPort(port, name, {_impl->get_inputs(), _impl->get_outputs()}),
+                        "Port for tensor name " + name + " was not found.");
+        set_tensor(port, tensor);
+    });
+}
+
+void InferRequest::set_tensors(const std::string& name, const std::vector<Tensor>& tensors) {
+    OV_INFER_REQ_CALL_STATEMENT({
+        ov::Output<const ov::Node> port;
+        OPENVINO_ASSERT(::getPort(port, name, {_impl->get_inputs()}),
+                        "set_tensors error. Input port for tensor name ",
+                        name,
+                        " was not found.");
+        set_tensors(port, tensors);
+    })
+}
+
+void InferRequest::set_tensors(const ov::Output<const ov::Node>& port, const std::vector<Tensor>& tensors) {
+    OV_INFER_REQ_CALL_STATEMENT({ _impl->set_tensors(port, tensors); })
+}
+
+void InferRequest::set_input_tensor(size_t idx, const Tensor& tensor) {
+    OV_INFER_REQ_CALL_STATEMENT({
+        const auto& inputs = _impl->get_inputs();
+        OPENVINO_ASSERT(inputs.size() > idx,
+                        "Input port for index ",
+                        idx,
+                        " was not found! The model has only ",
+                        inputs.size(),
+                        " inputs.");
+        set_tensor(inputs.at(idx), tensor);
+    });
+}
+
+void InferRequest::set_input_tensor(const Tensor& tensor) {
+    OV_INFER_REQ_CALL_STATEMENT({
+        const auto& inputs = _impl->get_inputs();
+        OPENVINO_ASSERT(inputs.size() == 1,
+                        "set_input_tensor() must be called on a function with exactly one parameter.");
+        set_tensor(inputs.at(0), tensor);
+    });
+}
+
+void InferRequest::set_input_tensors(size_t idx, const std::vector<Tensor>& tensors) {
+    OV_INFER_REQ_CALL_STATEMENT({
+        OPENVINO_ASSERT(idx < _impl->get_inputs().size(),
+                        "set_input_tensors error. Input port for index ",
+                        idx,
+                        " is out of bounds. Model has only ",
+                        _impl->get_inputs().size(),
+                        " inputs");
+        set_tensors(_impl->get_inputs().at(idx), tensors);
+    })
+}
+
+void InferRequest::set_input_tensors(const std::vector<Tensor>& tensors) {
+    OV_INFER_REQ_CALL_STATEMENT({
+        OPENVINO_ASSERT(_impl->get_inputs().size() == 1,
+                        "set_input_tensors(tensors) must be used for single-input models only. Model has ",
+                        _impl->get_inputs().size(),
+                        " inputs");
+        set_tensors(_impl->get_inputs().at(0), tensors);
+    })
+}
+
+void InferRequest::set_output_tensor(size_t idx, const Tensor& tensor) {
+    OV_INFER_REQ_CALL_STATEMENT({
+        const auto& outputs = _impl->get_outputs();
+        OPENVINO_ASSERT(outputs.size() > idx,
+                        "Output port for index ",
+                        idx,
+                        " was not found! The model has only ",
+                        outputs.size(),
+                        " outputs.");
+        set_tensor(outputs.at(idx), tensor);
+    });
+}
+
+void InferRequest::set_output_tensor(const Tensor& tensor) {
+    OV_INFER_REQ_CALL_STATEMENT({
+        const auto& outputs = _impl->get_outputs();
+        OPENVINO_ASSERT(outputs.size() == 1,
+                        "set_output_tensor() must be called on a function with exactly one parameter.");
+        set_tensor(outputs.at(0), tensor);
+    });
+}
+
+Tensor InferRequest::get_tensor(const ov::Output<const ov::Node>& port) {
+    std::vector<std::shared_ptr<void>> soVec;
+    OV_INFER_REQ_CALL_STATEMENT({
+        OPENVINO_ASSERT(_impl->get_tensors(port).empty(),
+                        "get_tensor shall not be used together with batched "
+                        "set_tensors/set_input_tensors for port '",
+                        port,
+                        "'");
+        auto tensor = _impl->get_tensor(port);
+        tensor._so.emplace_back(_so);
+
+        return tensor;
+    });
+}
+
+Tensor InferRequest::get_tensor(const ov::Output<ov::Node>& port) {
+    return get_tensor(ov::Output<const ov::Node>(port.get_node(), port.get_index()));
+}
+
+Tensor InferRequest::get_tensor(const std::string& name) {
+    OV_INFER_REQ_CALL_STATEMENT({
+        ov::Output<const ov::Node> port;
+        OPENVINO_ASSERT(::getPort(port, name, {_impl->get_inputs(), _impl->get_outputs()}),
+                        "Port for tensor name " + name + " was not found.");
+        return get_tensor(port);
+    });
+}
+
+Tensor InferRequest::get_input_tensor(size_t idx) {
+    OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->get_inputs().at(idx)); });
+}
+
+Tensor InferRequest::get_output_tensor(size_t idx) {
+    OV_INFER_REQ_CALL_STATEMENT({ return get_tensor(_impl->get_outputs().at(idx)); });
+}
+
+Tensor InferRequest::get_input_tensor() {
+    OV_INFER_REQ_CALL_STATEMENT({
+        const auto inputs = _impl->get_inputs();
+        if (inputs.size() != 1) {
+            throw ov::Exception("get_input_tensor() must be called on a function with exactly one parameter.");
+        }
+        return get_tensor(inputs.at(0));
+    });
+}
+
+Tensor InferRequest::get_output_tensor() {
+    OV_INFER_REQ_CALL_STATEMENT({
+        const auto outputs = _impl->get_outputs();
+        if (outputs.size() != 1) {
+            throw ov::Exception("get_output_tensor() must be called on a function with exactly one parameter.");
+        }
+        return get_tensor(outputs.at(0));
+    });
+}
+
+void InferRequest::infer() {
+    OV_INFER_REQ_CALL_STATEMENT(_impl->infer());
+}
+
+void InferRequest::cancel() {
+    OV_INFER_REQ_CALL_STATEMENT(_impl->cancel());
+}
+
+std::vector<ProfilingInfo> InferRequest::get_profiling_info() const {
+    OV_INFER_REQ_CALL_STATEMENT(return _impl->get_profiling_info());
+}
+
+void InferRequest::start_async() {
+    OV_INFER_REQ_CALL_STATEMENT(_impl->start_async());
+}
+
+void InferRequest::wait() {
+    OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
+    try {
+        _impl->wait();
+    } catch (const ov::Cancelled&) {
+        throw;
+    } catch (const ie::InferCancelled& e) {
+        throw Cancelled{e.what()};
+    } catch (const std::exception& ex) {
+        throw Exception(ex.what());
+    } catch (...) {
+        OPENVINO_UNREACHABLE("Unexpected exception");
+    }
+}
+
+bool InferRequest::wait_for(const std::chrono::milliseconds timeout) {
+    OPENVINO_ASSERT(_impl != nullptr, "InferRequest was not initialized.");
+    try {
+        return _impl->wait_for(timeout);
+    } catch (const ie::InferCancelled& e) {
+        throw Cancelled{e.what()};
+    } catch (const std::exception& ex) {
+        throw Exception(ex.what());
+    } catch (...) {
+        OPENVINO_UNREACHABLE("Unexpected exception");
+    }
+}
+
+void InferRequest::set_callback(std::function<void(std::exception_ptr)> callback) {
+    OV_INFER_REQ_CALL_STATEMENT(_impl->set_callback(std::move(callback));)
+}
+
+std::vector<VariableState> InferRequest::query_state() {
+    std::vector<VariableState> variable_states;
+    OV_INFER_REQ_CALL_STATEMENT({
+        for (auto&& state : _impl->query_state()) {
+            auto soVec = state._so;
+            soVec.emplace_back(_so);
+            variable_states.emplace_back(ov::VariableState{state._impl, soVec});
+        }
+    })
+    return variable_states;
+}
+
+CompiledModel InferRequest::get_compiled_model() {
+    OV_INFER_REQ_CALL_STATEMENT(return {_impl->get_compiled_model(), _so});
+}
+
+bool InferRequest::operator!() const noexcept {
+    return !_impl;
+}
+
+InferRequest::operator bool() const noexcept {
+    return (!!_impl);
+}
+
+bool InferRequest::operator!=(const InferRequest& r) const noexcept {
+    return !(r == *this);
+}
+
+bool InferRequest::operator==(const InferRequest& r) const noexcept {
+    return r._impl == _impl;
+}
+
+}  // namespace ov
--- a/src/inference/tests/unit/cpu_map_parser.cpp
+++ b/src/inference/tests/unit/cpu_map_parser.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Intel Corporation
+// Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/src/plugins/template/src/CMakeLists.txt
+++ b/src/plugins/template/src/CMakeLists.txt
@ -26,6 +26,7 @@ ov_mark_target_as_cc(${TARGET_NAME})

 target_include_directories(${TARGET_NAME} PRIVATE
    "${CMAKE_CURRENT_SOURCE_DIR}"
+    "${OpenVINO_SOURCE_DIR}/src/inference/src/dev" # TODO: remove after migration to new infer request
    "${TEMPLATE_PLUGIN_SOURCE_DIR}/include")

 # link common Inference Engine libraries
--- a/src/plugins/template/src/compiled_model.cpp
+++ b/src/plugins/template/src/compiled_model.cpp
@ -6,8 +6,12 @@

 #include <memory>

+#include "converter_utils.hpp"
 #include "ie_ngraph_utils.hpp"
 #include "ie_plugin_config.hpp"
+#include "openvino/core/except.hpp"
+#include "openvino/runtime/iinfer_request.hpp"
+#include "openvino/runtime/isync_infer_request.hpp"
 #include "plugin.hpp"
 #include "template/config.hpp"
 #include "template_async_infer_request.hpp"
@ -124,17 +128,8 @@ void TemplatePlugin::CompiledModel::compile_model(const std::shared_ptr<ov::Mode
 // ! [executable_network:map_graph]

 // ! [executable_network:create_infer_request]
-std::shared_ptr<InferenceEngine::IInferRequestInternal> TemplatePlugin::CompiledModel::create_infer_request() const {
-    auto internal_request = create_sync_infer_request();
-    return std::make_shared<TemplateAsyncInferRequest>(
-        std::static_pointer_cast<TemplatePlugin::TemplateInferRequest>(internal_request),
-        get_task_executor(),
-        get_template_plugin()->_waitExecutor,
-        get_callback_executor());
-}
-
-std::shared_ptr<InferenceEngine::IInferRequestInternal> TemplatePlugin::CompiledModel::create_sync_infer_request()
-    const {
+std::shared_ptr<ov::IAsyncInferRequest> TemplatePlugin::CompiledModel::create_infer_request() const {
+    // auto internal_request = create_sync_infer_request();
    std::vector<std::shared_ptr<const ov::Node>> _inputs, _outputs;
    for (const auto& output : m_model->inputs()) {
        _inputs.emplace_back(output.get_node_shared_ptr());
@ -143,10 +138,36 @@ std::shared_ptr<InferenceEngine::IInferRequestInternal> TemplatePlugin::Compiled
        _outputs.emplace_back(output.get_node_shared_ptr());
    }

-    return std::make_shared<TemplateInferRequest>(
+    auto internal_request = std::make_shared<TemplateInferRequest>(
        _inputs,
        _outputs,
        std::static_pointer_cast<const TemplatePlugin::CompiledModel>(shared_from_this()));
+    auto async_infer_request = std::make_shared<TemplateAsyncInferRequest>(
+        std::static_pointer_cast<TemplatePlugin::TemplateInferRequest>(internal_request),
+        get_task_executor(),
+        get_template_plugin()->_waitExecutor,
+        get_callback_executor());
+
+    async_infer_request->setPointerToExecutableNetworkInternal(
+        ov::legacy_convert::convert_compiled_model(std::const_pointer_cast<ov::ICompiledModel>(shared_from_this())));
+
+    return ov::legacy_convert::convert_infer_request(async_infer_request);
+}
+
+std::shared_ptr<ov::ISyncInferRequest> TemplatePlugin::CompiledModel::create_sync_infer_request() const {
+    OPENVINO_NOT_IMPLEMENTED;
+    // std::vector<std::shared_ptr<const ov::Node>> _inputs, _outputs;
+    // for (const auto& output : m_model->inputs()) {
+    //     _inputs.emplace_back(output.get_node_shared_ptr());
+    // }
+    // for (const auto& output : outputs()) {
+    //     _outputs.emplace_back(output.get_node_shared_ptr());
+    // }
+    //
+    // return std::make_shared<TemplateInferRequest>(
+    //     _inputs,
+    //     _outputs,
+    //     std::static_pointer_cast<const TemplatePlugin::CompiledModel>(shared_from_this()));
 }
 // ! [executable_network:create_infer_request]

--- a/src/plugins/template/src/compiled_model.hpp
+++ b/src/plugins/template/src/compiled_model.hpp
@ -5,6 +5,9 @@
 #pragma once

 #include "openvino/runtime/icompiled_model.hpp"
+#include "openvino/runtime/iinfer_request.hpp"
+#include "openvino/runtime/isync_infer_request.hpp"
+#include "openvino/runtime/tensor.hpp"
 #include "template_config.hpp"
 #include "template_infer_request.hpp"

@ -34,10 +37,10 @@ public:
    virtual ov::Any get_property(const std::string& name) const override;

    ov::RemoteContext get_context() const override;
-    std::shared_ptr<InferenceEngine::IInferRequestInternal> create_infer_request() const override;
+    std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;

 protected:
-    std::shared_ptr<InferenceEngine::IInferRequestInternal> create_sync_infer_request() const override;
+    std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;

 private:
    friend class TemplateInferRequest;