include api changes (#8837)
* include api changes * include api changes for executable_network * Create InferVec container * update infer method * update infer methods in ie_api * update runtime to work with dict * update infer_engine test to work with infer returning dict * remove inferMap container * add return type to infer and infer_new_request in ie_api * add auto to infer loops * fix typo * change OrderedDict to dict * change infer methods return type to dict * update infer methods to return py::dict * style fix * create helper function outputs_to_dict * remove InferMap and InferVec * remove unused imports * add const to outputs * add get_result method * add test for get_results * style fix * add reference to outputs_to_dict parameters * remove blank line * fix style * change get_results method to result property * code fix in ie_api * add f16 and bf16 support to outputs_to_dict function * update the way of receiving keys from dict * update the way of key receive in test_core * add test for reslts attribute for async infer * remove out of date comment * add view(int16) to bf16 and f16 in outputs_to_dict() function
This commit is contained in:
committed by
GitHub
parent
8faa08da12
commit
b65a14d587
@@ -44,13 +44,10 @@ def get_input_types(obj: Union[InferRequestBase, ExecutableNetworkBase]) -> dict
|
||||
class InferRequest(InferRequestBase):
|
||||
"""InferRequest wrapper."""
|
||||
|
||||
def infer(self, inputs: dict = None) -> List[np.ndarray]:
|
||||
def infer(self, inputs: dict = None) -> dict:
|
||||
"""Infer wrapper for InferRequest."""
|
||||
inputs = {} if inputs is None else normalize_inputs(inputs, get_input_types(self))
|
||||
res = super().infer(inputs)
|
||||
# Required to return list since np.ndarray forces all of tensors data to match in
|
||||
# dimensions. This results in errors when running ops like variadic split.
|
||||
return [copy.deepcopy(tensor.data) for tensor in res]
|
||||
return super().infer(inputs)
|
||||
|
||||
def start_async(self, inputs: dict = None, userdata: Any = None) -> None:
|
||||
"""Asynchronous infer wrapper for InferRequest."""
|
||||
@@ -65,13 +62,10 @@ class ExecutableNetwork(ExecutableNetworkBase):
|
||||
"""Create new InferRequest object."""
|
||||
return InferRequest(super().create_infer_request())
|
||||
|
||||
def infer_new_request(self, inputs: dict = None) -> List[np.ndarray]:
|
||||
def infer_new_request(self, inputs: dict = None) -> dict:
|
||||
"""Infer wrapper for ExecutableNetwork."""
|
||||
inputs = {} if inputs is None else normalize_inputs(inputs, get_input_types(self))
|
||||
res = super().infer_new_request(inputs)
|
||||
# Required to return list since np.ndarray forces all of tensors data to match in
|
||||
# dimensions. This results in errors when running ops like variadic split.
|
||||
return [copy.deepcopy(tensor.data) for tensor in res]
|
||||
return super().infer_new_request(inputs)
|
||||
|
||||
|
||||
class AsyncInferQueue(AsyncInferQueueBase):
|
||||
|
||||
@@ -296,4 +296,83 @@ uint32_t get_optimal_number_of_requests(const ov::runtime::ExecutableNetwork& ac
|
||||
}
|
||||
}
|
||||
|
||||
py::dict outputs_to_dict(const std::vector<ov::Output<const ov::Node>>& outputs, ov::runtime::InferRequest& request) {
|
||||
py::dict res;
|
||||
for (const auto& out : outputs) {
|
||||
ov::runtime::Tensor t{request.get_tensor(out)};
|
||||
std::cout << t.get_element_type() << " !\n";
|
||||
switch (t.get_element_type()) {
|
||||
case ov::element::Type_t::i8: {
|
||||
py::array arr(t.get_shape(), t.data<int8_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::i16: {
|
||||
py::array arr(t.get_shape(), t.data<int16_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::i32: {
|
||||
py::array arr(t.get_shape(), t.data<int32_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::i64: {
|
||||
py::array arr(t.get_shape(), t.data<int64_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::u8: {
|
||||
py::array arr(t.get_shape(), t.data<uint8_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::u16: {
|
||||
py::array arr(t.get_shape(), t.data<uint16_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::u32: {
|
||||
py::array arr(t.get_shape(), t.data<uint32_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::u64: {
|
||||
py::array arr(t.get_shape(), t.data<uint64_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::bf16: {
|
||||
py::array arr(t.get_shape(), t.data<ov::bfloat16>());
|
||||
res[py::cast(out)] = arr.view("int16");
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::f16: {
|
||||
py::array arr(t.get_shape(), t.data<ov::float16>());
|
||||
res[py::cast(out)] = arr.view("int16");
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::f32: {
|
||||
py::array arr(t.get_shape(), t.data<float>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::f64: {
|
||||
py::array arr(t.get_shape(), t.data<double>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::boolean: {
|
||||
py::array arr(t.get_shape(), t.data<bool*>());
|
||||
res[py::cast(out)] = arr;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}; // namespace Common
|
||||
|
||||
@@ -44,6 +44,8 @@ namespace Common
|
||||
|
||||
uint32_t get_optimal_number_of_requests(const ov::runtime::ExecutableNetwork& actual);
|
||||
|
||||
py::dict outputs_to_dict(const std::vector<ov::Output<const ov::Node>>& outputs, ov::runtime::InferRequest& request);
|
||||
|
||||
// Use only with classes that are not creatable by users on Python's side, because
|
||||
// Objects created in Python that are wrapped with such wrapper will cause memory leaks.
|
||||
template <typename T>
|
||||
|
||||
@@ -17,7 +17,6 @@ namespace py = pybind11;
|
||||
namespace Containers {
|
||||
using TensorIndexMap = std::map<size_t, ov::runtime::Tensor>;
|
||||
using TensorNameMap = std::map<std::string, ov::runtime::Tensor>;
|
||||
using InferResults = std::vector<ov::runtime::Tensor>;
|
||||
|
||||
void regclass_TensorIndexMap(py::module m);
|
||||
void regclass_TensorNameMap(py::module m);
|
||||
|
||||
@@ -36,12 +36,7 @@ void regclass_ExecutableNetwork(py::module m) {
|
||||
// Update inputs if there are any
|
||||
Common::set_request_tensors(request, inputs);
|
||||
request.infer();
|
||||
|
||||
Containers::InferResults results;
|
||||
for (const auto out : self.outputs()) {
|
||||
results.push_back(request.get_tensor(out));
|
||||
}
|
||||
return results;
|
||||
return Common::outputs_to_dict(self.outputs(), request);
|
||||
},
|
||||
py::arg("inputs"));
|
||||
|
||||
|
||||
@@ -65,11 +65,8 @@ void regclass_InferRequest(py::module m) {
|
||||
self._start_time = Time::now();
|
||||
self._request.infer();
|
||||
self._end_time = Time::now();
|
||||
Containers::InferResults results;
|
||||
for (auto& out : self._outputs) {
|
||||
results.push_back(self._request.get_tensor(out));
|
||||
}
|
||||
return results;
|
||||
|
||||
return Common::outputs_to_dict(self._outputs, self._request);
|
||||
},
|
||||
py::arg("inputs"));
|
||||
|
||||
@@ -271,4 +268,8 @@ void regclass_InferRequest(py::module m) {
|
||||
cls.def_property_readonly("profiling_info", [](InferRequestWrapper& self) {
|
||||
return self._request.get_profiling_info();
|
||||
});
|
||||
|
||||
cls.def_property_readonly("results", [](InferRequestWrapper& self) {
|
||||
return Common::outputs_to_dict(self._outputs, self._request);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -83,12 +83,13 @@ class Computation(object):
|
||||
def convert_buffers(self, source_buffers, target_dtypes):
|
||||
converted_buffers = []
|
||||
for i in range(len(source_buffers)):
|
||||
k = list(source_buffers)[i]
|
||||
target_dtype = target_dtypes[i]
|
||||
# custom conversion for bf16
|
||||
if self.results[i].get_output_element_type(0) == Type.bf16:
|
||||
converted_buffers.append((source_buffers[i].view(np.uint32) >> 16).astype(np.uint16))
|
||||
converted_buffers.append((source_buffers[k].view(np.uint32) >> 16).astype(np.uint16))
|
||||
else:
|
||||
converted_buffers.append(source_buffers[i].astype(target_dtype))
|
||||
converted_buffers.append(source_buffers[k].astype(target_dtype))
|
||||
return converted_buffers
|
||||
|
||||
def __call__(self, *input_values: NumericData) -> List[NumericData]:
|
||||
|
||||
@@ -24,7 +24,7 @@ def test_compact_api_xml():
|
||||
model = compile_model(test_net_xml)
|
||||
assert(isinstance(model, ExecutableNetwork))
|
||||
results = model.infer_new_request({"data": img})
|
||||
assert np.argmax(results) == 2
|
||||
assert np.argmax(results[list(results)[0]]) == 2
|
||||
|
||||
|
||||
def test_compact_api_onnx():
|
||||
@@ -33,7 +33,7 @@ def test_compact_api_onnx():
|
||||
model = compile_model(test_net_onnx)
|
||||
assert(isinstance(model, ExecutableNetwork))
|
||||
results = model.infer_new_request({"data": img})
|
||||
assert np.argmax(results) == 2
|
||||
assert np.argmax(results[list(results)[0]]) == 2
|
||||
|
||||
|
||||
def test_core_class():
|
||||
@@ -53,8 +53,7 @@ def test_core_class():
|
||||
|
||||
input_tensor = Tensor(input_data)
|
||||
results = request.infer({"parameter": input_tensor})
|
||||
|
||||
assert np.allclose(results, expected_output)
|
||||
assert np.allclose(results[list(results)[0]], expected_output)
|
||||
|
||||
|
||||
def test_compile_model(device):
|
||||
|
||||
@@ -231,7 +231,7 @@ def test_infer_new_request_numpy(device):
|
||||
img = read_image()
|
||||
exec_net = ie.compile_model(func, device)
|
||||
res = exec_net.infer_new_request({"data": img})
|
||||
assert np.argmax(res) == 2
|
||||
assert np.argmax(res[list(res)[0]]) == 2
|
||||
|
||||
|
||||
def test_infer_new_request_tensor_numpy_copy(device):
|
||||
@@ -242,8 +242,8 @@ def test_infer_new_request_tensor_numpy_copy(device):
|
||||
exec_net = ie.compile_model(func, device)
|
||||
res_tensor = exec_net.infer_new_request({"data": tensor})
|
||||
res_img = exec_net.infer_new_request({"data": tensor})
|
||||
assert np.argmax(res_tensor) == 2
|
||||
assert np.argmax(res_tensor) == np.argmax(res_img)
|
||||
assert np.argmax(res_tensor[list(res_tensor)[0]]) == 2
|
||||
assert np.argmax(res_tensor[list(res_tensor)[0]]) == np.argmax(res_img[list(res_img)[0]])
|
||||
|
||||
|
||||
def test_infer_tensor_numpy_shared_memory(device):
|
||||
@@ -255,8 +255,8 @@ def test_infer_tensor_numpy_shared_memory(device):
|
||||
exec_net = ie.compile_model(func, device)
|
||||
res_tensor = exec_net.infer_new_request({"data": tensor})
|
||||
res_img = exec_net.infer_new_request({"data": tensor})
|
||||
assert np.argmax(res_tensor) == 2
|
||||
assert np.argmax(res_tensor) == np.argmax(res_img)
|
||||
assert np.argmax(res_tensor[list(res_tensor)[0]]) == 2
|
||||
assert np.argmax(res_tensor[list(res_tensor)[0]]) == np.argmax(res_img[list(res_img)[0]])
|
||||
|
||||
|
||||
def test_infer_new_request_wrong_port_name(device):
|
||||
@@ -292,7 +292,7 @@ def test_infer_numpy_model_from_buffer(device):
|
||||
img = read_image()
|
||||
exec_net = core.compile_model(func, device)
|
||||
res = exec_net.infer_new_request({"data": img})
|
||||
assert np.argmax(res) == 2
|
||||
assert np.argmax(res[list(res)[0]]) == 2
|
||||
|
||||
|
||||
def test_infer_tensor_model_from_buffer(device):
|
||||
@@ -306,4 +306,4 @@ def test_infer_tensor_model_from_buffer(device):
|
||||
tensor = Tensor(img)
|
||||
exec_net = core.compile_model(func, device)
|
||||
res = exec_net.infer_new_request({"data": tensor})
|
||||
assert np.argmax(res) == 2
|
||||
assert np.argmax(res[list(res)[0]]) == 2
|
||||
|
||||
@@ -61,9 +61,10 @@ def test_tensor_setter(device):
|
||||
assert np.allclose(tensor.data, t1.data, atol=1e-2, rtol=1e-2)
|
||||
|
||||
res = request1.infer({0: tensor})
|
||||
res_1 = np.sort(res[0])
|
||||
k = list(res)[0]
|
||||
res_1 = np.sort(res[k])
|
||||
t2 = request1.get_tensor("fc_out")
|
||||
assert np.allclose(t2.data, res[0].data, atol=1e-2, rtol=1e-2)
|
||||
assert np.allclose(t2.data, res[k].data, atol=1e-2, rtol=1e-2)
|
||||
|
||||
request = exec_net_2.create_infer_request()
|
||||
res = request.infer({"data": tensor})
|
||||
@@ -187,7 +188,7 @@ def test_infer_mixed_keys(device):
|
||||
|
||||
request = model.create_infer_request()
|
||||
res = request.infer({0: tensor2, "data": tensor})
|
||||
assert np.argmax(res) == 2
|
||||
assert np.argmax(res[list(res)[0]]) == 2
|
||||
|
||||
|
||||
def test_infer_queue(device):
|
||||
@@ -304,11 +305,49 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode):
|
||||
# reset initial state of ReadValue to zero
|
||||
mem_state.reset()
|
||||
res = request.infer({0: np.full(input_shape, 1, dtype=data_type)})
|
||||
|
||||
# always ones
|
||||
expected_res = np.full(input_shape, 1, dtype=data_type)
|
||||
else:
|
||||
res = request.infer({0: np.full(input_shape, 1, dtype=data_type)})
|
||||
expected_res = np.full(input_shape, i, dtype=data_type)
|
||||
assert np.allclose(res[0], expected_res, atol=1e-6), \
|
||||
|
||||
assert np.allclose(res[list(res)[0]], expected_res, atol=1e-6), \
|
||||
"Expected values: {} \n Actual values: {} \n".format(expected_res, res)
|
||||
|
||||
|
||||
def test_get_results(device):
|
||||
core = Core()
|
||||
func = core.read_model(test_net_xml, test_net_bin)
|
||||
core.set_config({"PERF_COUNT": "YES"}, device)
|
||||
exec_net = core.compile_model(func, device)
|
||||
img = read_image()
|
||||
request = exec_net.create_infer_request()
|
||||
outputs = request.infer({0: img})
|
||||
assert np.allclose(list(outputs.values()), list(request.results.values()))
|
||||
|
||||
|
||||
def test_results_async_infer(device):
|
||||
jobs = 8
|
||||
num_request = 4
|
||||
core = Core()
|
||||
func = core.read_model(test_net_xml, test_net_bin)
|
||||
exec_net = core.compile_model(func, device)
|
||||
infer_queue = AsyncInferQueue(exec_net, num_request)
|
||||
jobs_done = [{"finished": False, "latency": 0} for _ in range(jobs)]
|
||||
|
||||
def callback(request, job_id):
|
||||
jobs_done[job_id]["finished"] = True
|
||||
jobs_done[job_id]["latency"] = request.latency
|
||||
|
||||
img = read_image()
|
||||
infer_queue.set_callback(callback)
|
||||
assert infer_queue.is_ready
|
||||
for i in range(jobs):
|
||||
infer_queue.start_async({"data": img}, i)
|
||||
infer_queue.wait_all()
|
||||
|
||||
request = exec_net.create_infer_request()
|
||||
outputs = request.infer({0: img})
|
||||
|
||||
for i in range(num_request):
|
||||
np.allclose(list(outputs.values()), list(infer_queue[i].results.values()))
|
||||
|
||||
Reference in New Issue
Block a user