[PYTHON API] update InferQueue (#8513)

* Bind exec core ov (#50) * Output const node python tests (#52) * add python bindings tests for Output<const ov::None> * add proper tests * add new line * rename ie_version to version * Pszmel/bind infer request (#51) * remove set_batch, get_blob and set_blob * update InferRequest class * change InferenceEngine::InferRequest to ov::runtime::InferRequest * update set_callback body * update bindings to reflect ov::runtime::InferRequest * bind set_input_tensor and get_input_tensor * style fix * clen ie_infer_queue.cpp * Bind exec core ov (#50) * bind core, exec_net classes * rm unused function * add new line * rename ie_infer_request -> infer_request * update imports * update __init__.py * update ie_api.py * Replace old containers with the new one * create impl for create_infer_request * comment out infer_queue to avoid errors with old infer_request * update infer_request bind to reflect new infer_request api * comment out inpuit_info from ie_network to avoid errors with old containers * Register new containers and comment out InferQueue * update infer request tests * style fix * remove unused imports * remove unused imports and 2 methods * add tests to cover all new methods from infer_request * style fix * add test * remove registration of InferResults * update name of exception_ptr parameter * update the loops that iterate through inputs and outputs * clean setCustomCallbacks * style fix * add Tensor import * style fix * update infer and normalize_inputs * style fix * rename startTime and endTime * Create test for mixed keys as infer arguments * update infer function * update return type of infer Co-authored-by: Bartek Szmelczynski <bartosz.szmelczynski@intel.com> * fix get_version * fix opaque issue * some cosmetic changes * fix codestyle in tests * make tests green * Extend python InferRequest * Extend python Function * Change return value of infer call * Fix missing precisions conversions in CPU plugin * Rework of runtime for new tests * Fixed onnx reading in python tests * Edit compatibility tests * Edit tests * Add FLOAT_LIKE xfails * [Python API] bind ProfilingInfo (#55) * bind ProfilingInfo * Add tests * Fix code style * Add property * fix codestyle * Infer new request method (#56) * fix conflicts, add infer_new_request function * remove redundant functions, fix style * revert the unwanted changes * revert removal of the Blob * revert removal of isTblob * add add_extension from path * codestyle * fix win build * add inputs-outputs to function * update infer queue * fix code style * Hot-fix CPU plugin with precision * fix start_async * add performance hint to time infer (#8480) * Updated common migration pipeline (#8176) * Updated common migration pipeline * Fixed merge issue * Added new model and extended example * Fixed typo * Added v10-v11 comparison * Avoid redundant graph nodes scans (#8415) * Refactor work with env variables (#8208) * del MO_ROOT * del MO_ROOT from common_utils.py * add MO_PATH to common_utils.py * change mo_path * [IE Sample Scripts] Use cmake to build samples (#8442) * Use cmake to build samples * Add the option to set custom build output folder * Remove opset8 from compatibility ngraph python API (#8452) * [GPU] OneDNN gpu submodule update to version 2.5 (#8449) * [GPU] OneDNN gpu submodule update to version 2.5 * [GPU] Updated onednn submodule and added layout optimizer fix * Install rules for static libraries case (#8384) * Proper cmake install for static libraries case * Added an ability to skip template plugin * Added install rules for VPU / GPU * Install more libraries * Fixed absolute TBB include paths * Disable GNA * Fixed issue with linker * Some fixes * Fixed linkage issues in tests * Disabled some tests * Updated CI pipelines * Fixed Windows linkage * Fixed custom_opset test for static casr * Fixed CVS-70313 * Continue on error * Fixed clanf-format * Try to fix Windows linker * Fixed compilation * Disable samples * Fixed samples build with THREADING=SEQ * Fixed link error on Windows * Fixed ieFuncTests * Added static Azure CI * Revert "Fixed link error on Windows" This reverts commit 78cca36fd2. * Merge static and dynamic linux pipelines * Fixed Azure * fix codestyle * rename all methods in this class to snake_case * some updates * code style * fix code style in tests * compute latency in callback * Fix get_idle_request * fix latency * Fix code style Co-authored-by: Bartek Szmelczynski <bartosz.szmelczynski@intel.com> Co-authored-by: Anastasia Kuporosova <anastasia.kuporosova@intel.com> Co-authored-by: Piotr Szmelczynski <piotr.szmelczynski@intel.com> Co-authored-by: jiwaszki <jan.iwaszkiewicz@intel.com> Co-authored-by: Victor Kuznetsov <victor.kuznetsov@intel.com> Co-authored-by: Ilya Churaev <ilya.churaev@intel.com> Co-authored-by: Tomasz Jankowski <tomasz1.jankowski@intel.com> Co-authored-by: Dmitry Pigasin <dmitry.pigasin@intel.com> Co-authored-by: Artur Kulikowski <artur.kulikowski@intel.com> Co-authored-by: Ilya Znamenskiy <ilya.znamenskiy@intel.com> Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
2021-11-15 14:12:24 +03:00 · 2021-11-15 14:12:24 +03:00 · 4a1cfdc9ff
commit 4a1cfdc9ff
parent e6884c3fd7
10 changed files with 249 additions and 241 deletions
--- a/runtime/bindings/python/src/openvino/init.py
+++ b/runtime/bindings/python/src/openvino/init.py
@ -36,7 +36,7 @@ from openvino.pyopenvino import InputInfoCPtr
 from openvino.pyopenvino import DataPtr
 from openvino.pyopenvino import TensorDesc
 from openvino.pyopenvino import get_version
-#from openvino.pyopenvino import InferQueue
+from openvino.pyopenvino import AsyncInferQueue
 from openvino.pyopenvino import InferRequest  # TODO: move to ie_api?
 from openvino.pyopenvino import Blob
 from openvino.pyopenvino import PreProcessInfo
@ -83,5 +83,5 @@ ExecutableNetwork.infer_new_request = infer_new_request
 # Patching InferRequest
 InferRequest.infer = infer
 InferRequest.start_async = start_async
-# Patching InferQueue
+# Patching AsyncInferQueue
-#InferQueue.async_infer = async_infer
+AsyncInferQueue.start_async = start_async
--- a/runtime/bindings/python/src/openvino/ie_api.py
+++ b/runtime/bindings/python/src/openvino/ie_api.py
@ -3,7 +3,7 @@
 import numpy as np
 import copy
-from typing import List
+from typing import List, Union
 from openvino.pyopenvino import TBlobFloat32
 from openvino.pyopenvino import TBlobFloat64
@ -17,6 +17,7 @@ from openvino.pyopenvino import TBlobInt8
 from openvino.pyopenvino import TBlobUint8
 from openvino.pyopenvino import TensorDesc
 from openvino.pyopenvino import InferRequest
 from openvino.pyopenvino import AsyncInferQueue
 from openvino.pyopenvino import ExecutableNetwork
 from openvino.pyopenvino import Tensor
@ -57,7 +58,7 @@ def infer_new_request(exec_net: ExecutableNetwork, inputs: dict = None) -> List[
    return [copy.deepcopy(tensor.data) for tensor in res]
 # flake8: noqa: D102
-def start_async(request: InferRequest, inputs: dict = {}, userdata: dict = None) -> None:  # type: ignore
+def start_async(request: Union[InferRequest, AsyncInferQueue], inputs: dict = {}, userdata: dict = None) -> None:  # type: ignore
    request._start_async(inputs=normalize_inputs(inputs), userdata=userdata)
 # flake8: noqa: C901
--- a/runtime/bindings/python/src/pyopenvino/core/async_infer_queue.cpp
+++ b/runtime/bindings/python/src/pyopenvino/core/async_infer_queue.cpp
@ -0,0 +1,205 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 #include "pyopenvino/core/async_infer_queue.hpp"
 #include <ie_common.h>
 #include <pybind11/functional.h>
 #include <pybind11/stl.h>
 #include <chrono>
 #include <condition_variable>
 #include <mutex>
 #include <queue>
 #include <string>
 #include <vector>
 #include "pyopenvino/core/common.hpp"
 #include "pyopenvino/core/infer_request.hpp"
 namespace py = pybind11;
 class AsyncInferQueue {
 public:
    AsyncInferQueue(std::vector<InferRequestWrapper> requests,
                    std::queue<size_t> idle_handles,
                    std::vector<py::object> user_ids)
        : _requests(requests),
          _idle_handles(idle_handles),
          _user_ids(user_ids) {
        this->set_default_callbacks();
    }
    ~AsyncInferQueue() {
        _requests.clear();
    }
    bool _is_ready() {
        py::gil_scoped_release release;
        std::unique_lock<std::mutex> lock(_mutex);
        _cv.wait(lock, [this] {
            return !(_idle_handles.empty());
        });
        return !(_idle_handles.empty());
    }
    size_t get_idle_request_id() {
        // Wait for any of _idle_handles
        py::gil_scoped_release release;
        std::unique_lock<std::mutex> lock(_mutex);
        _cv.wait(lock, [this] {
            return !(_idle_handles.empty());
        });
        return _idle_handles.front();
        ;
    }
    void wait_all() {
        // Wait for all requests to return with callback thus updating
        // _idle_handles so it matches the size of requests
        py::gil_scoped_release release;
        std::unique_lock<std::mutex> lock(_mutex);
        _cv.wait(lock, [this] {
            return _idle_handles.size() == _requests.size();
        });
    }
    void set_default_callbacks() {
        for (size_t handle = 0; handle < _requests.size(); handle++) {
            _requests[handle]._request.set_callback([this, handle /* ... */](std::exception_ptr exception_ptr) {
                _requests[handle]._end_time = Time::now();
                // Add idle handle to queue
                _idle_handles.push(handle);
                // Notify locks in getIdleRequestId() or waitAll() functions
                _cv.notify_one();
            });
        }
    }
    void set_custom_callbacks(py::function f_callback) {
        for (size_t handle = 0; handle < _requests.size(); handle++) {
            _requests[handle]._request.set_callback([this, f_callback, handle](std::exception_ptr exception_ptr) {
                _requests[handle]._end_time = Time::now();
                try {
                    if (exception_ptr) {
                        std::rethrow_exception(exception_ptr);
                    }
                } catch (const std::exception& e) {
                    throw ov::Exception(e.what());
                }
                // Acquire GIL, execute Python function
                py::gil_scoped_acquire acquire;
                f_callback(_requests[handle], _user_ids[handle]);
                // Add idle handle to queue
                _idle_handles.push(handle);
                // Notify locks in getIdleRequestId() or waitAll() functions
                _cv.notify_one();
            });
        }
    }
    std::vector<InferRequestWrapper> _requests;
    std::queue<size_t> _idle_handles;
    std::vector<py::object> _user_ids;  // user ID can be any Python object
    std::mutex _mutex;
    std::condition_variable _cv;
 };
 void regclass_AsyncInferQueue(py::module m) {
    py::class_<AsyncInferQueue, std::shared_ptr<AsyncInferQueue>> cls(m, "AsyncInferQueue");
    cls.def(py::init([](ov::runtime::ExecutableNetwork& net, size_t jobs) {
                if (jobs == 0) {
                    jobs = (size_t)Common::get_optimal_number_of_requests(net);
                }
                std::vector<InferRequestWrapper> requests;
                std::queue<size_t> idle_handles;
                std::vector<py::object> user_ids(jobs);
                for (size_t handle = 0; handle < jobs; handle++) {
                    auto request = InferRequestWrapper(net.create_infer_request());
                    // Get Inputs and Outputs info from executable network
                    request._inputs = net.inputs();
                    request._outputs = net.outputs();
                    requests.push_back(request);
                    idle_handles.push(handle);
                }
                return new AsyncInferQueue(requests, idle_handles, user_ids);
            }),
            py::arg("network"),
            py::arg("jobs") = 0);
    cls.def(
        "_start_async",
        [](AsyncInferQueue& self, const py::dict inputs, py::object userdata) {
            // getIdleRequestId function has an intention to block InferQueue
            // until there is at least one idle (free to use) InferRequest
            auto handle = self.get_idle_request_id();
            self._idle_handles.pop();
            // Set new inputs label/id from user
            self._user_ids[handle] = userdata;
            // Update inputs if there are any
            if (!inputs.empty()) {
                if (py::isinstance<std::string>(inputs.begin()->first)) {
                    auto inputs_map = Common::cast_to_tensor_name_map(inputs);
                    for (auto&& input : inputs_map) {
                        self._requests[handle]._request.set_tensor(input.first, input.second);
                    }
                } else if (py::isinstance<int>(inputs.begin()->first)) {
                    auto inputs_map = Common::cast_to_tensor_index_map(inputs);
                    for (auto&& input : inputs_map) {
                        self._requests[handle]._request.set_input_tensor(input.first, input.second);
                    }
                }
            }
            // Now GIL can be released - we are NOT working with Python objects in this block
            {
                py::gil_scoped_release release;
                self._requests[handle]._start_time = Time::now();
                // Start InferRequest in asynchronus mode
                self._requests[handle]._request.start_async();
            }
        },
        py::arg("inputs"),
        py::arg("userdata"));
    cls.def("is_ready", [](AsyncInferQueue& self) {
        return self._is_ready();
    });
    cls.def("wait_all", [](AsyncInferQueue& self) {
        return self.wait_all();
    });
    cls.def("get_idle_request_id", [](AsyncInferQueue& self) {
        return self.get_idle_request_id();
    });
    cls.def("set_callback", [](AsyncInferQueue& self, py::function f_callback) {
        self.set_custom_callbacks(f_callback);
    });
    cls.def("__len__", [](AsyncInferQueue& self) {
        return self._requests.size();
    });
    cls.def(
        "__iter__",
        [](AsyncInferQueue& self) {
            return py::make_iterator(self._requests.begin(), self._requests.end());
        },
        py::keep_alive<0, 1>()); /* Keep set alive while iterator is used */
    cls.def("__getitem__", [](AsyncInferQueue& self, size_t i) {
        return self._requests[i];
    });
    cls.def_property_readonly("userdata", [](AsyncInferQueue& self) {
        return self._user_ids;
    });
 }
--- a/runtime/bindings/python/src/pyopenvino/core/async_infer_queue.hpp
+++ b/runtime/bindings/python/src/pyopenvino/core/async_infer_queue.hpp
@ -7,4 +7,4 @@
 namespace py = pybind11;
-void regclass_InferQueue(py::module m);
+void regclass_AsyncInferQueue(py::module m);
--- a/runtime/bindings/python/src/pyopenvino/core/common.cpp
+++ b/runtime/bindings/python/src/pyopenvino/core/common.cpp
@ -321,13 +321,13 @@ void set_request_blobs(InferenceEngine::InferRequest& request, const py::dict& d
    }
 }
-uint32_t get_optimal_number_of_requests(const InferenceEngine::ExecutableNetwork& actual) {
+uint32_t get_optimal_number_of_requests(const ov::runtime::ExecutableNetwork& actual) {
    try {
-        auto parameter_value = actual.GetMetric(METRIC_KEY(SUPPORTED_METRICS));
+        auto parameter_value = actual.get_metric(METRIC_KEY(SUPPORTED_METRICS));
        auto supported_metrics = parameter_value.as<std::vector<std::string>>();
        const std::string key = METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS);
        if (std::find(supported_metrics.begin(), supported_metrics.end(), key) != supported_metrics.end()) {
-            parameter_value = actual.GetMetric(key);
+            parameter_value = actual.get_metric(key);
            if (parameter_value.is<unsigned int>())
                return parameter_value.as<unsigned int>();
            else
--- a/runtime/bindings/python/src/pyopenvino/core/common.hpp
+++ b/runtime/bindings/python/src/pyopenvino/core/common.hpp
@ -15,6 +15,7 @@
 #include "Python.h"
 #include "ie_common.h"
 #include "openvino/runtime/tensor.hpp"
 #include "openvino/runtime/executable_network.hpp"
 #include "pyopenvino/core/containers.hpp"
 namespace py = pybind11;
@ -60,5 +61,5 @@ namespace Common
    void set_request_blobs(InferenceEngine::InferRequest& request, const py::dict& dictonary);
-    uint32_t get_optimal_number_of_requests(const InferenceEngine::ExecutableNetwork& actual);
+    uint32_t get_optimal_number_of_requests(const ov::runtime::ExecutableNetwork& actual);
 }; // namespace Common
--- a/runtime/bindings/python/src/pyopenvino/core/ie_infer_queue.cpp
+++ b/runtime/bindings/python/src/pyopenvino/core/ie_infer_queue.cpp
@ -1,228 +0,0 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 #include "pyopenvino/core/ie_infer_queue.hpp"
 #include <ie_common.h>
 #include <pybind11/functional.h>
 #include <pybind11/stl.h>
 #include <chrono>
 #include <condition_variable>
 #include <cpp/ie_executable_network.hpp>
 #include <cpp/ie_infer_request.hpp>
 #include <ie_iinfer_request.hpp>
 #include <mutex>
 #include <queue>
 #include <string>
 #include <vector>
 #include "pyopenvino/core/common.hpp"
 #include "pyopenvino/core/infer_request.hpp"
 #define INVALID_ID -1
 namespace py = pybind11;
 class InferQueue {
 public:
    InferQueue(std::vector<InferRequestWrapper> requests,
               std::queue<size_t> idle_handles,
               std::vector<py::object> user_ids)
        : _requests(requests),
          _idle_handles(idle_handles),
          _user_ids(user_ids) {
        this->setDefaultCallbacks();
        _last_id = -1;
    }
    ~InferQueue() {
        _requests.clear();
    }
    bool _is_ready() {
        py::gil_scoped_release release;
        std::unique_lock<std::mutex> lock(_mutex);
        _cv.wait(lock, [this] {
            return !(_idle_handles.empty());
        });
        return !(_idle_handles.empty());
    }
    py::dict _getIdleRequestInfo() {
        py::gil_scoped_release release;
        std::unique_lock<std::mutex> lock(_mutex);
        _cv.wait(lock, [this] {
            return !(_idle_handles.empty());
        });
        size_t request_id = _idle_handles.front();
        py::dict request_info = py::dict();
        request_info["id"] = request_id;
        // request_info["status"] = true; // TODO
        return request_info;
    }
    size_t getIdleRequestId() {
        // Wait for any of _idle_handles
        py::gil_scoped_release release;
        std::unique_lock<std::mutex> lock(_mutex);
        _cv.wait(lock, [this] {
            return !(_idle_handles.empty());
        });
        size_t idle_request_id = _idle_handles.front();
        _idle_handles.pop();
        return idle_request_id;
    }
    std::vector<bool> waitAll() {
        // Wait for all requests to return with callback thus updating
        // _idle_handles so it matches the size of requests
        py::gil_scoped_release release;
        std::unique_lock<std::mutex> lock(_mutex);
        _cv.wait(lock, [this] {
            return _idle_handles.size() == _requests.size();
        });
        std::vector<bool> statuses;
        for (size_t handle = 0; handle < _requests.size(); handle++) {
            statuses.push_back(_requests[handle]._request.wait_for(std::chrono::milliseconds(0)));
        }
        return statuses;
    }
    void setDefaultCallbacks() {
        for (size_t handle = 0; handle < _requests.size(); handle++) {
            _requests[handle]._request.set_callback([this, handle /* ... */](std::exception_ptr exception_ptr) {
                _requests[handle]._end_time = Time::now();
                // Add idle handle to queue
                _idle_handles.push(handle);
                // Notify locks in getIdleRequestId() or waitAll() functions
                _cv.notify_one();
            });
        }
    }
    void setCustomCallbacks(py::function f_callback) {
        for (size_t handle = 0; handle < _requests.size(); handle++) {
            _requests[handle]._request.set_callback([this, f_callback, handle](std::exception_ptr exception_ptr) {
                _requests[handle]._end_time = Time::now();
                try {
                    if (exception_ptr) {
                        std::rethrow_exception(exception_ptr);
                    }
                } catch (const std::exception& e) {
                    IE_THROW() << "Caught exception: " << e.what();
                }
                // Acquire GIL, execute Python function
                py::gil_scoped_acquire acquire;
                f_callback(_requests[handle], _user_ids[handle]);
                // Add idle handle to queue
                _idle_handles.push(handle);
                // Notify locks in getIdleRequestId() or waitAll() functions
                _cv.notify_one();
            });
        }
    }
    std::vector<InferRequestWrapper> _requests;
    std::queue<size_t> _idle_handles;
    std::vector<py::object> _user_ids;  // user ID can be any Python object
    size_t _last_id;
    std::mutex _mutex;
    std::condition_variable _cv;
 };
 // void regclass_InferQueue(py::module m) {
 //     py::class_<InferQueue, std::shared_ptr<InferQueue>> cls(m, "InferQueue");
 //     cls.def(py::init([](InferenceEngine::ExecutableNetwork& net, size_t jobs) {
 //                 if (jobs == 0) {
 //                     const InferenceEngine::ExecutableNetwork& _net = net;
 //                     jobs = (size_t)Common::get_optimal_number_of_requests(_net);
 //                 }
 //                 std::vector<InferRequestWrapper> requests;
 //                 std::queue<size_t> idle_handles;
 //                 std::vector<py::object> user_ids(jobs);
 //                 for (size_t handle = 0; handle < jobs; handle++) {
 //                     auto request = InferRequestWrapper(net.CreateInferRequest());
 //                     // Get Inputs and Outputs info from executable network
 //                     request._inputsInfo = net.GetInputsInfo();
 //                     request._outputsInfo = net.GetOutputsInfo();
 //                     requests.push_back(request);
 //                     idle_handles.push(handle);
 //                 }
 //                 return new InferQueue(requests, idle_handles, user_ids);
 //             }),
 //             py::arg("network"),
 //             py::arg("jobs") = 0);
 //     cls.def(
 //         "_async_infer",
 //         [](InferQueue& self, const py::dict inputs, py::object userdata) {
 //             // getIdleRequestId function has an intention to block InferQueue
 //             // until there is at least one idle (free to use) InferRequest
 //             auto handle = self.getIdleRequestId();
 //             // Set new inputs label/id from user
 //             self._user_ids[handle] = userdata;
 //             // Update inputs of picked InferRequest
 //             if (!inputs.empty()) {
 //                 Common::set_request_blobs(self._requests[handle]._request, inputs);
 //             }
 //             // Now GIL can be released - we are NOT working with Python objects in this block
 //             {
 //                 py::gil_scoped_release release;
 //                 self._requests[handle]._start_time = Time::now();
 //                 // Start InferRequest in asynchronus mode
 //                 self._requests[handle]._request.start_async();
 //             }
 //         },
 //         py::arg("inputs"),
 //         py::arg("userdata"));
 //     cls.def("is_ready", [](InferQueue& self) {
 //         return self._is_ready();
 //     });
 //     cls.def("wait_all", [](InferQueue& self) {
 //         return self.waitAll();
 //     });
 //     cls.def("get_idle_request_info", [](InferQueue& self) {
 //         return self._getIdleRequestInfo();
 //     });
 //     cls.def("set_infer_callback", [](InferQueue& self, py::function f_callback) {
 //         self.setCustomCallbacks(f_callback);
 //     });
 //     cls.def("__len__", [](InferQueue& self) {
 //         return self._requests.size();
 //     });
 //     cls.def(
 //         "__iter__",
 //         [](InferQueue& self) {
 //             return py::make_iterator(self._requests.begin(), self._requests.end());
 //         },
 //         py::keep_alive<0, 1>()); /* Keep set alive while iterator is used */
 //     cls.def("__getitem__", [](InferQueue& self, size_t i) {
 //         return self._requests[i];
 //     });
 //     cls.def_property_readonly("userdata", [](InferQueue& self) {
 //         return self._user_ids;
 //     });
 // }
--- a/runtime/bindings/python/src/pyopenvino/core/infer_request.hpp
+++ b/runtime/bindings/python/src/pyopenvino/core/infer_request.hpp
@ -20,11 +20,15 @@ public:
    InferRequestWrapper(ov::runtime::InferRequest request)
        : _request(request)
    {
        // AsyncInferQueue uses this constructor - setting callback for computing a latency will be done there
    }
    InferRequestWrapper(ov::runtime::InferRequest request, const std::vector<ov::Output<const ov::Node>>& inputs, const std::vector<ov::Output<const ov::Node>>& outputs)
        : _request(request), _inputs(inputs), _outputs(outputs)
    {
        _request.set_callback([this](std::exception_ptr exception_ptr) {
            _end_time = Time::now();
        });
    }
    // ~InferRequestWrapper() = default;
--- a/runtime/bindings/python/src/pyopenvino/pyopenvino.cpp
+++ b/runtime/bindings/python/src/pyopenvino/pyopenvino.cpp
@ -19,12 +19,12 @@
 #if defined(NGRAPH_ONNX_FRONTEND_ENABLE)
 #    include "pyopenvino/graph/onnx_import/onnx_import.hpp"
 #endif
 #include "pyopenvino/core/async_infer_queue.hpp"
 #include "pyopenvino/core/containers.hpp"
 #include "pyopenvino/core/core.hpp"
 #include "pyopenvino/core/executable_network.hpp"
 #include "pyopenvino/core/ie_blob.hpp"
 #include "pyopenvino/core/ie_data.hpp"
 #include "pyopenvino/core/ie_infer_queue.hpp"
 #include "pyopenvino/core/ie_input_info.hpp"
 #include "pyopenvino/core/ie_network.hpp"
 #include "pyopenvino/core/ie_parameter.hpp"
@ -127,7 +127,7 @@ PYBIND11_MODULE(pyopenvino, m) {
    regclass_Version(m);
    regclass_Parameter(m);
    regclass_InputInfo(m);
-    // regclass_InferQueue(m);
+    regclass_AsyncInferQueue(m);
    regclass_ProfilingInfo(m);
    regclass_PreProcessInfo(m);
--- a/runtime/bindings/python/tests/test_inference_engine/test_infer_request.py
+++ b/runtime/bindings/python/tests/test_inference_engine/test_infer_request.py
@ -8,7 +8,7 @@ import datetime
 import time
 from ..conftest import image_path, model_path
-from openvino import Core, Tensor, ProfilingInfo
+from openvino import Core, AsyncInferQueue, Tensor, ProfilingInfo
 is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
 test_net_xml, test_net_bin = model_path(is_myriad)
@ -35,6 +35,7 @@ def test_get_profiling_info(device):
    img = read_image()
    request = exec_net.create_infer_request()
    request.infer({0: img})
    assert request.latency > 0
    prof_info = request.get_profiling_info()
    soft_max_node = next(node for node in prof_info if node.node_name == "fc_out")
    assert soft_max_node.node_type == "Softmax"
@ -168,6 +169,7 @@ def test_start_async(device):
        request.start_async({0: img})
    for request in requests:
        request.wait()
        assert request.latency > 0
    assert callbacks_info["finished"] == jobs
@ -187,3 +189,26 @@ def test_infer_mixed_keys(device):
    with pytest.raises(TypeError) as e:
        request.infer({0: tensor, "fc_out": tensor2})
    assert "incompatible function arguments!" in str(e.value)
 def test_infer_queue(device):
    jobs = 8
    num_request = 4
    core = Core()
    func = core.read_model(test_net_xml, test_net_bin)
    exec_net = core.compile_model(func, device)
    infer_queue = AsyncInferQueue(exec_net, num_request)
    jobs_done = [{"finished": False, "latency": 0} for _ in range(jobs)]
    def callback(request, job_id):
        jobs_done[job_id]["finished"] = True
        jobs_done[job_id]["latency"] = request.latency
    img = read_image()
    infer_queue.set_callback(callback)
    assert infer_queue.is_ready
    for i in range(jobs):
        infer_queue.start_async({"data": img}, i)
    infer_queue.wait_all()
    assert all(job["finished"] for job in jobs_done)
    assert all(job["latency"] > 0 for job in jobs_done)
`@ -7,4 +7,4 @@`

	`namespace py = pybind11;`	`namespace py = pybind11;`

	`void regclass_InferQueue(py::module m);`	`void regclass_AsyncInferQueue(py::module m);`