[IE PYTHON] fix gil (#8068)

* remove nogil

* Add test
This commit is contained in:
Alexey Lebedev 2021-10-21 12:06:41 +03:00 committed by GitHub
parent 0793290762
commit d7fbd6f7ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 66 additions and 32 deletions

View File

@ -544,9 +544,7 @@ cdef class IECore:
# If there are more than one device of a specific type, they all are listed followed by a dot and a number. # If there are more than one device of a specific type, they all are listed followed by a dot and a number.
@property @property
def available_devices(self): def available_devices(self):
cdef vector[string] c_devices cdef vector[string] c_devices = self.impl.getAvailableDevices()
with nogil:
c_devices = self.impl.getAvailableDevices()
return [d.decode() for d in c_devices] return [d.decode() for d in c_devices]
## This structure stores info about pre-processing of network inputs (scale, mean image, ...) ## This structure stores info about pre-processing of network inputs (scale, mean image, ...)
@ -925,14 +923,11 @@ cdef class ExecutableNetwork:
## A tuple of `InferRequest` instances ## A tuple of `InferRequest` instances
@property @property
def requests(self): def requests(self):
cdef size_t c_infer_requests_size cdef size_t c_infer_requests_size = deref(self.impl).infer_requests.size()
with nogil:
c_infer_requests_size = deref(self.impl).infer_requests.size()
if len(self._infer_requests) == 0: if len(self._infer_requests) == 0:
for i in range(c_infer_requests_size): for i in range(c_infer_requests_size):
infer_request = InferRequest() infer_request = InferRequest()
with nogil: infer_request.impl = &(deref(self.impl).infer_requests[i])
infer_request.impl = &(deref(self.impl).infer_requests[i])
infer_request._inputs_list = list(self.input_info.keys()) infer_request._inputs_list = list(self.input_info.keys())
infer_request._outputs_list = list(self.outputs.keys()) infer_request._outputs_list = list(self.outputs.keys())
for input_name in infer_request._inputs_list: for input_name in infer_request._inputs_list:
@ -1052,10 +1047,7 @@ cdef class ExecutableNetwork:
## Get idle request ID ## Get idle request ID
# @return Request index # @return Request index
cpdef get_idle_request_id(self): cpdef get_idle_request_id(self):
cdef int request_id return deref(self.impl).getIdleRequestId()
with nogil:
request_id = deref(self.impl).getIdleRequestId()
return request_id
ctypedef extern void (*cb_type)(void*, int) with gil ctypedef extern void (*cb_type)(void*, int) with gil
@ -1197,8 +1189,7 @@ cdef class InferRequest:
cpdef infer(self, inputs=None): cpdef infer(self, inputs=None):
if inputs is not None: if inputs is not None:
self._fill_inputs(inputs) self._fill_inputs(inputs)
with nogil: deref(self.impl).infer()
deref(self.impl).infer()
## Starts asynchronous inference of the infer request and fill outputs array ## Starts asynchronous inference of the infer request and fill outputs array
# #
@ -1215,8 +1206,7 @@ cdef class InferRequest:
cpdef async_infer(self, inputs=None): cpdef async_infer(self, inputs=None):
if inputs is not None: if inputs is not None:
self._fill_inputs(inputs) self._fill_inputs(inputs)
with nogil: deref(self.impl).infer_async()
deref(self.impl).infer_async()
## Waits for the result to become available. Blocks until specified timeout elapses or the result ## Waits for the result to become available. Blocks until specified timeout elapses or the result
# becomes available, whichever comes first. # becomes available, whichever comes first.
@ -1337,8 +1327,7 @@ cdef class IENetwork:
def __cinit__(self, model = None): def __cinit__(self, model = None):
# Try to create Inference Engine network from capsule # Try to create Inference Engine network from capsule
if model is not None: if model is not None:
with nogil: self.impl = C.IENetwork(model)
self.impl = C.IENetwork(model)
else: else:
with nogil: with nogil:
self.impl = C.IENetwork() self.impl = C.IENetwork()
@ -1352,9 +1341,7 @@ cdef class IENetwork:
## A dictionary that maps input layer names to InputInfoPtr objects. ## A dictionary that maps input layer names to InputInfoPtr objects.
@property @property
def input_info(self): def input_info(self):
cdef map[string, C.InputInfo.Ptr] c_inputs cdef map[string, C.InputInfo.Ptr] c_inputs = self.impl.getInputsInfo()
with nogil:
c_inputs = self.impl.getInputsInfo()
inputs = {} inputs = {}
cdef InputInfoPtr input_info_ptr cdef InputInfoPtr input_info_ptr
for input in c_inputs: for input in c_inputs:
@ -1367,9 +1354,7 @@ cdef class IENetwork:
## A dictionary that maps output layer names to DataPtr objects ## A dictionary that maps output layer names to DataPtr objects
@property @property
def outputs(self): def outputs(self):
cdef map[string, C.DataPtr] c_outputs cdef map[string, C.DataPtr] c_outputs = self.impl.getOutputs()
with nogil:
c_outputs = self.impl.getOutputs()
outputs = {} outputs = {}
cdef DataPtr data_ptr cdef DataPtr data_ptr
for output in c_outputs: for output in c_outputs:

View File

@ -162,18 +162,18 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
object getMetric(const string & metric_name) except + object getMetric(const string & metric_name) except +
object getConfig(const string & metric_name) except + object getConfig(const string & metric_name) except +
int wait(int num_requests, int64_t timeout) nogil int wait(int num_requests, int64_t timeout) nogil
int getIdleRequestId() nogil int getIdleRequestId()
shared_ptr[CExecutableNetwork] getPluginLink() except + shared_ptr[CExecutableNetwork] getPluginLink() except +
cdef cppclass IENetwork: cdef cppclass IENetwork:
IENetwork() nogil except + IENetwork() nogil except +
IENetwork(object) nogil except + IENetwork(object) except +
string name string name
size_t batch_size size_t batch_size
string precision string precision
map[string, vector[size_t]] inputs map[string, vector[size_t]] inputs
const map[string, InputInfo.Ptr] getInputsInfo() nogil except + const map[string, InputInfo.Ptr] getInputsInfo() except +
map[string, DataPtr] getOutputs() nogil except + map[string, DataPtr] getOutputs() except +
void addOutput(string &, size_t) except + void addOutput(string &, size_t) except +
void setAffinity(map[string, string] & types_affinity_map, map[string, string] & layers_affinity_map) except + void setAffinity(map[string, string] & types_affinity_map, map[string, string] & layers_affinity_map) except +
void setBatch(size_t size) except + void setBatch(size_t size) except +
@ -193,8 +193,8 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
void setBlob(const string &blob_name, const CBlob.Ptr &blob_ptr, CPreProcessInfo& info) except + void setBlob(const string &blob_name, const CBlob.Ptr &blob_ptr, CPreProcessInfo& info) except +
const CPreProcessInfo& getPreProcess(const string& blob_name) except + const CPreProcessInfo& getPreProcess(const string& blob_name) except +
map[string, ProfileInfo] getPerformanceCounts() except + map[string, ProfileInfo] getPerformanceCounts() except +
void infer() nogil except + void infer() except +
void infer_async() nogil except + void infer_async() except +
int wait(int64_t timeout) nogil except + int wait(int64_t timeout) nogil except +
void setBatch(int size) except + void setBatch(int size) except +
void setCyCallback(void (*)(void*, int), void *) except + void setCyCallback(void (*)(void*, int), void *) except +
@ -219,7 +219,7 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
void unregisterPlugin(const string & deviceName) except + void unregisterPlugin(const string & deviceName) except +
void registerPlugins(const string & xmlConfigFile) except + void registerPlugins(const string & xmlConfigFile) except +
void addExtension(const string & ext_lib_path, const string & deviceName) except + void addExtension(const string & ext_lib_path, const string & deviceName) except +
vector[string] getAvailableDevices() nogil except + vector[string] getAvailableDevices() except +
object getMetric(const string & deviceName, const string & name) except + object getMetric(const string & deviceName, const string & name) except +
object getConfig(const string & deviceName, const string & name) except + object getConfig(const string & deviceName, const string & name) except +

View File

@ -5,7 +5,7 @@ import os
import pytest import pytest
from sys import platform from sys import platform
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Event, Thread
from time import sleep, time from time import sleep, time
from queue import Queue from queue import Queue
@ -274,3 +274,52 @@ def test_load_network_release_gil(device):
# Assert there were never any long gil locks # Assert there were never any long gil locks
assert message_queue.qsize() == 0, \ assert message_queue.qsize() == 0, \
f"More than 0 GIL locks occured! Latency: {message_queue.get()})" f"More than 0 GIL locks occured! Latency: {message_queue.get()})"
def test_nogil_safe(device):
call_thread_func = Event()
core = IECore()
net = core.read_network(model=test_net_xml, weights=test_net_bin)
def thread_target(thread_func, thread_args):
call_thread_func.wait()
call_thread_func.clear()
thread_func(*thread_args)
def main_thread_target(gil_release_func, args):
call_thread_func.set()
gil_release_func(*args)
assert not call_thread_func.is_set()
def test_run_parallel(gil_release_func, args, thread_func, thread_args):
thread = Thread(target=thread_target, args=[thread_func, thread_args])
thread.start()
main_thread_target(gil_release_func, args)
thread.join()
main_targets = [{
core.read_network: [test_net_xml, test_net_bin],
core.load_network: [net, device],
},
{
core.load_network: [net, device],
}]
thread_targets = [{
core.get_versions: [device,],
core.read_network: [test_net_xml, test_net_bin],
core.load_network: [net, device],
core.query_network: [net, device],
getattr: [core, "available_devices"],
},
{
getattr: [net, "name"],
getattr: [net, "input_info"],
getattr: [net, "outputs"],
getattr: [net, "batch_size"],
}]
for main_target, custom_target in zip(main_targets, thread_targets):
for nogil_func, args in main_target.items():
for thread_func, thread_args in custom_target.items():
test_run_parallel(nogil_func, args, thread_func, thread_args)