diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/runtime/ie_api.py index 26088c169c4..22cf00c85e7 100644 --- a/src/bindings/python/src/openvino/runtime/ie_api.py +++ b/src/bindings/python/src/openvino/runtime/ie_api.py @@ -44,13 +44,10 @@ def get_input_types(obj: Union[InferRequestBase, ExecutableNetworkBase]) -> dict class InferRequest(InferRequestBase): """InferRequest wrapper.""" - def infer(self, inputs: dict = None) -> List[np.ndarray]: + def infer(self, inputs: dict = None) -> dict: """Infer wrapper for InferRequest.""" inputs = {} if inputs is None else normalize_inputs(inputs, get_input_types(self)) - res = super().infer(inputs) - # Required to return list since np.ndarray forces all of tensors data to match in - # dimensions. This results in errors when running ops like variadic split. - return [copy.deepcopy(tensor.data) for tensor in res] + return super().infer(inputs) def start_async(self, inputs: dict = None, userdata: Any = None) -> None: """Asynchronous infer wrapper for InferRequest.""" @@ -65,13 +62,10 @@ class ExecutableNetwork(ExecutableNetworkBase): """Create new InferRequest object.""" return InferRequest(super().create_infer_request()) - def infer_new_request(self, inputs: dict = None) -> List[np.ndarray]: + def infer_new_request(self, inputs: dict = None) -> dict: """Infer wrapper for ExecutableNetwork.""" inputs = {} if inputs is None else normalize_inputs(inputs, get_input_types(self)) - res = super().infer_new_request(inputs) - # Required to return list since np.ndarray forces all of tensors data to match in - # dimensions. This results in errors when running ops like variadic split. - return [copy.deepcopy(tensor.data) for tensor in res] + return super().infer_new_request(inputs) class AsyncInferQueue(AsyncInferQueueBase): diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index 6ef5774163a..afcbb3d7b2a 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -296,4 +296,83 @@ uint32_t get_optimal_number_of_requests(const ov::runtime::ExecutableNetwork& ac } } +py::dict outputs_to_dict(const std::vector>& outputs, ov::runtime::InferRequest& request) { + py::dict res; + for (const auto& out : outputs) { + ov::runtime::Tensor t{request.get_tensor(out)}; + std::cout << t.get_element_type() << " !\n"; + switch (t.get_element_type()) { + case ov::element::Type_t::i8: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::i16: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::i32: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::i64: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::u8: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::u16: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::u32: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::u64: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::bf16: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr.view("int16"); + break; + } + case ov::element::Type_t::f16: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr.view("int16"); + break; + } + case ov::element::Type_t::f32: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::f64: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + case ov::element::Type_t::boolean: { + py::array arr(t.get_shape(), t.data()); + res[py::cast(out)] = arr; + break; + } + default: { + break; + } + } + } + return res; +} + }; // namespace Common diff --git a/src/bindings/python/src/pyopenvino/core/common.hpp b/src/bindings/python/src/pyopenvino/core/common.hpp index 68d28ec6396..7be421b3434 100644 --- a/src/bindings/python/src/pyopenvino/core/common.hpp +++ b/src/bindings/python/src/pyopenvino/core/common.hpp @@ -44,6 +44,8 @@ namespace Common uint32_t get_optimal_number_of_requests(const ov::runtime::ExecutableNetwork& actual); + py::dict outputs_to_dict(const std::vector>& outputs, ov::runtime::InferRequest& request); + // Use only with classes that are not creatable by users on Python's side, because // Objects created in Python that are wrapped with such wrapper will cause memory leaks. template diff --git a/src/bindings/python/src/pyopenvino/core/containers.hpp b/src/bindings/python/src/pyopenvino/core/containers.hpp index e24e7336236..268ce68ff06 100644 --- a/src/bindings/python/src/pyopenvino/core/containers.hpp +++ b/src/bindings/python/src/pyopenvino/core/containers.hpp @@ -17,7 +17,6 @@ namespace py = pybind11; namespace Containers { using TensorIndexMap = std::map; using TensorNameMap = std::map; - using InferResults = std::vector; void regclass_TensorIndexMap(py::module m); void regclass_TensorNameMap(py::module m); diff --git a/src/bindings/python/src/pyopenvino/core/executable_network.cpp b/src/bindings/python/src/pyopenvino/core/executable_network.cpp index 37d6811f38d..116582465fc 100644 --- a/src/bindings/python/src/pyopenvino/core/executable_network.cpp +++ b/src/bindings/python/src/pyopenvino/core/executable_network.cpp @@ -36,12 +36,7 @@ void regclass_ExecutableNetwork(py::module m) { // Update inputs if there are any Common::set_request_tensors(request, inputs); request.infer(); - - Containers::InferResults results; - for (const auto out : self.outputs()) { - results.push_back(request.get_tensor(out)); - } - return results; + return Common::outputs_to_dict(self.outputs(), request); }, py::arg("inputs")); diff --git a/src/bindings/python/src/pyopenvino/core/infer_request.cpp b/src/bindings/python/src/pyopenvino/core/infer_request.cpp index 762a272ae83..14dc2befeef 100644 --- a/src/bindings/python/src/pyopenvino/core/infer_request.cpp +++ b/src/bindings/python/src/pyopenvino/core/infer_request.cpp @@ -65,11 +65,8 @@ void regclass_InferRequest(py::module m) { self._start_time = Time::now(); self._request.infer(); self._end_time = Time::now(); - Containers::InferResults results; - for (auto& out : self._outputs) { - results.push_back(self._request.get_tensor(out)); - } - return results; + + return Common::outputs_to_dict(self._outputs, self._request); }, py::arg("inputs")); @@ -271,4 +268,8 @@ void regclass_InferRequest(py::module m) { cls.def_property_readonly("profiling_info", [](InferRequestWrapper& self) { return self._request.get_profiling_info(); }); + + cls.def_property_readonly("results", [](InferRequestWrapper& self) { + return Common::outputs_to_dict(self._outputs, self._request); + }); } diff --git a/src/bindings/python/tests/runtime.py b/src/bindings/python/tests/runtime.py index a64425d34bc..d263723bb81 100644 --- a/src/bindings/python/tests/runtime.py +++ b/src/bindings/python/tests/runtime.py @@ -83,12 +83,13 @@ class Computation(object): def convert_buffers(self, source_buffers, target_dtypes): converted_buffers = [] for i in range(len(source_buffers)): + k = list(source_buffers)[i] target_dtype = target_dtypes[i] # custom conversion for bf16 if self.results[i].get_output_element_type(0) == Type.bf16: - converted_buffers.append((source_buffers[i].view(np.uint32) >> 16).astype(np.uint16)) + converted_buffers.append((source_buffers[k].view(np.uint32) >> 16).astype(np.uint16)) else: - converted_buffers.append(source_buffers[i].astype(target_dtype)) + converted_buffers.append(source_buffers[k].astype(target_dtype)) return converted_buffers def __call__(self, *input_values: NumericData) -> List[NumericData]: diff --git a/src/bindings/python/tests/test_inference_engine/test_core.py b/src/bindings/python/tests/test_inference_engine/test_core.py index 095dcb7e88d..7ba09f09b15 100644 --- a/src/bindings/python/tests/test_inference_engine/test_core.py +++ b/src/bindings/python/tests/test_inference_engine/test_core.py @@ -24,7 +24,7 @@ def test_compact_api_xml(): model = compile_model(test_net_xml) assert(isinstance(model, ExecutableNetwork)) results = model.infer_new_request({"data": img}) - assert np.argmax(results) == 2 + assert np.argmax(results[list(results)[0]]) == 2 def test_compact_api_onnx(): @@ -33,7 +33,7 @@ def test_compact_api_onnx(): model = compile_model(test_net_onnx) assert(isinstance(model, ExecutableNetwork)) results = model.infer_new_request({"data": img}) - assert np.argmax(results) == 2 + assert np.argmax(results[list(results)[0]]) == 2 def test_core_class(): @@ -53,8 +53,7 @@ def test_core_class(): input_tensor = Tensor(input_data) results = request.infer({"parameter": input_tensor}) - - assert np.allclose(results, expected_output) + assert np.allclose(results[list(results)[0]], expected_output) def test_compile_model(device): diff --git a/src/bindings/python/tests/test_inference_engine/test_executable_network.py b/src/bindings/python/tests/test_inference_engine/test_executable_network.py index 1396cf4a99d..e958f5b3e8a 100644 --- a/src/bindings/python/tests/test_inference_engine/test_executable_network.py +++ b/src/bindings/python/tests/test_inference_engine/test_executable_network.py @@ -231,7 +231,7 @@ def test_infer_new_request_numpy(device): img = read_image() exec_net = ie.compile_model(func, device) res = exec_net.infer_new_request({"data": img}) - assert np.argmax(res) == 2 + assert np.argmax(res[list(res)[0]]) == 2 def test_infer_new_request_tensor_numpy_copy(device): @@ -242,8 +242,8 @@ def test_infer_new_request_tensor_numpy_copy(device): exec_net = ie.compile_model(func, device) res_tensor = exec_net.infer_new_request({"data": tensor}) res_img = exec_net.infer_new_request({"data": tensor}) - assert np.argmax(res_tensor) == 2 - assert np.argmax(res_tensor) == np.argmax(res_img) + assert np.argmax(res_tensor[list(res_tensor)[0]]) == 2 + assert np.argmax(res_tensor[list(res_tensor)[0]]) == np.argmax(res_img[list(res_img)[0]]) def test_infer_tensor_numpy_shared_memory(device): @@ -255,8 +255,8 @@ def test_infer_tensor_numpy_shared_memory(device): exec_net = ie.compile_model(func, device) res_tensor = exec_net.infer_new_request({"data": tensor}) res_img = exec_net.infer_new_request({"data": tensor}) - assert np.argmax(res_tensor) == 2 - assert np.argmax(res_tensor) == np.argmax(res_img) + assert np.argmax(res_tensor[list(res_tensor)[0]]) == 2 + assert np.argmax(res_tensor[list(res_tensor)[0]]) == np.argmax(res_img[list(res_img)[0]]) def test_infer_new_request_wrong_port_name(device): @@ -292,7 +292,7 @@ def test_infer_numpy_model_from_buffer(device): img = read_image() exec_net = core.compile_model(func, device) res = exec_net.infer_new_request({"data": img}) - assert np.argmax(res) == 2 + assert np.argmax(res[list(res)[0]]) == 2 def test_infer_tensor_model_from_buffer(device): @@ -306,4 +306,4 @@ def test_infer_tensor_model_from_buffer(device): tensor = Tensor(img) exec_net = core.compile_model(func, device) res = exec_net.infer_new_request({"data": tensor}) - assert np.argmax(res) == 2 + assert np.argmax(res[list(res)[0]]) == 2 diff --git a/src/bindings/python/tests/test_inference_engine/test_infer_request.py b/src/bindings/python/tests/test_inference_engine/test_infer_request.py index 9376e9f08b8..1fa7ad1ec14 100644 --- a/src/bindings/python/tests/test_inference_engine/test_infer_request.py +++ b/src/bindings/python/tests/test_inference_engine/test_infer_request.py @@ -61,9 +61,10 @@ def test_tensor_setter(device): assert np.allclose(tensor.data, t1.data, atol=1e-2, rtol=1e-2) res = request1.infer({0: tensor}) - res_1 = np.sort(res[0]) + k = list(res)[0] + res_1 = np.sort(res[k]) t2 = request1.get_tensor("fc_out") - assert np.allclose(t2.data, res[0].data, atol=1e-2, rtol=1e-2) + assert np.allclose(t2.data, res[k].data, atol=1e-2, rtol=1e-2) request = exec_net_2.create_infer_request() res = request.infer({"data": tensor}) @@ -187,7 +188,7 @@ def test_infer_mixed_keys(device): request = model.create_infer_request() res = request.infer({0: tensor2, "data": tensor}) - assert np.argmax(res) == 2 + assert np.argmax(res[list(res)[0]]) == 2 def test_infer_queue(device): @@ -304,11 +305,49 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode): # reset initial state of ReadValue to zero mem_state.reset() res = request.infer({0: np.full(input_shape, 1, dtype=data_type)}) - # always ones expected_res = np.full(input_shape, 1, dtype=data_type) else: res = request.infer({0: np.full(input_shape, 1, dtype=data_type)}) expected_res = np.full(input_shape, i, dtype=data_type) - assert np.allclose(res[0], expected_res, atol=1e-6), \ + + assert np.allclose(res[list(res)[0]], expected_res, atol=1e-6), \ "Expected values: {} \n Actual values: {} \n".format(expected_res, res) + + +def test_get_results(device): + core = Core() + func = core.read_model(test_net_xml, test_net_bin) + core.set_config({"PERF_COUNT": "YES"}, device) + exec_net = core.compile_model(func, device) + img = read_image() + request = exec_net.create_infer_request() + outputs = request.infer({0: img}) + assert np.allclose(list(outputs.values()), list(request.results.values())) + + +def test_results_async_infer(device): + jobs = 8 + num_request = 4 + core = Core() + func = core.read_model(test_net_xml, test_net_bin) + exec_net = core.compile_model(func, device) + infer_queue = AsyncInferQueue(exec_net, num_request) + jobs_done = [{"finished": False, "latency": 0} for _ in range(jobs)] + + def callback(request, job_id): + jobs_done[job_id]["finished"] = True + jobs_done[job_id]["latency"] = request.latency + + img = read_image() + infer_queue.set_callback(callback) + assert infer_queue.is_ready + for i in range(jobs): + infer_queue.start_async({"data": img}, i) + infer_queue.wait_all() + + request = exec_net.create_infer_request() + outputs = request.infer({0: img}) + + for i in range(num_request): + np.allclose(list(outputs.values()), list(infer_queue[i].results.values()))