Faster JSON encoder/decoder

Improve performance of FreeIPA's JSON serializer and deserializer.

* Don't indent and sort keys. Both options trigger a slow path in
  Python's json package. Without indention and sorting, encoding
  mostly happens in optimized C code.
* Replace O(n) type checks with O(1) type lookup and eliminate
  the use of isinstance().
* Check each client capability only once for every conversion.
* Use decoder's obj_hook feature to traverse the object tree once and
  to eliminate calls to isinstance().

Closes: https://fedorahosted.org/freeipa/ticket/6655
Signed-off-by: Christian Heimes <cheimes@redhat.com>
Reviewed-By: Martin Basti <mbasti@redhat.com>
Reviewed-By: Jan Cholasta <jcholast@redhat.com>
This commit is contained in:
Christian Heimes 2017-02-13 09:46:39 +01:00 committed by Martin Basti
parent 593ea7da9a
commit 8159c2883b
2 changed files with 134 additions and 83 deletions

View File

@ -51,7 +51,7 @@ from six.moves import urllib
from ipalib.backend import Connectible from ipalib.backend import Connectible
from ipalib.constants import LDAP_GENERALIZED_TIME_FORMAT from ipalib.constants import LDAP_GENERALIZED_TIME_FORMAT
from ipalib.errors import (public_errors, UnknownError, NetworkError, from ipalib.errors import (public_errors, UnknownError, NetworkError,
KerberosError, XMLRPCMarshallError, JSONError, ConversionError) KerberosError, XMLRPCMarshallError, JSONError)
from ipalib import errors, capabilities from ipalib import errors, capabilities
from ipalib.request import context, Connection from ipalib.request import context, Connection
from ipapython.ipa_log_manager import root_logger from ipapython.ipa_log_manager import root_logger
@ -274,67 +274,140 @@ def xml_dumps(params, version, methodname=None, methodresponse=False,
) )
def json_encode_binary(val, version): class _JSONConverter(dict):
''' __slots__ = ('version', '_cap_datetime', '_cap_dnsname')
JSON cannot encode binary values. We encode binary values in Python str
objects and text in Python unicode objects. In order to allow a binary
object to be passed through JSON we base64 encode it thus converting it to
text which JSON can transport. To assure we recognize the value is a base64
encoded representation of the original binary value and not confuse it with
other text we convert the binary value to a dict in this form:
{'__base64__' : base64_encoding_of_binary_value} _identity = object()
This modification of the original input value cannot be done "in place" as def __init__(self, version, _identity=_identity):
one might first assume (e.g. replacing any binary items in a container super(_JSONConverter, self).__init__()
(e.g. list, tuple, dict) with the base64 dict because the container might be self.version = version
an immutable object (i.e. a tuple). Therefore this function returns a copy self._cap_datetime = None
of any container objects it encounters with tuples replaced by lists. This self._cap_dnsname = None
is O.K. because the JSON encoding will map both lists and tuples to JSON self.update({
arrays. unicode: _identity,
''' bool: _identity,
type(None): _identity,
float: _identity,
Decimal: unicode,
DN: str,
Principal: unicode,
DNSName: self._enc_dnsname,
datetime.datetime: self._enc_datetime,
bytes: self._enc_bytes,
list: self._enc_list,
tuple: self._enc_list,
dict: self._enc_dict,
})
# int, long
for t in six.integer_types:
self[t] = _identity
if isinstance(val, dict): def __missing__(self, typ):
new_dict = {} # walk MRO to find best match
for k, v in val.items(): for c in typ.__mro__:
new_dict[k] = json_encode_binary(v, version) if c in self:
return new_dict self[typ] = self[c]
elif isinstance(val, (list, tuple)): return self[c]
new_list = [json_encode_binary(v, version) for v in val] # use issubclass to check for registered ABCs
return new_list for c in self:
elif isinstance(val, bytes): if issubclass(typ, c):
self[typ] = self[c]
return self[c]
raise TypeError(typ)
def convert(self, obj, _identity=_identity):
# obj.__class__ is twice as fast as type(obj)
func = self[obj.__class__]
return obj if func is _identity else func(obj)
def _enc_datetime(self, val):
cap = self._cap_datetime
if cap is None:
cap = capabilities.client_has_capability(self.version,
'datetime_values')
self._cap_datetime = cap
if cap:
return {'__datetime__': val.strftime(LDAP_GENERALIZED_TIME_FORMAT)}
else:
return val.strftime(LDAP_GENERALIZED_TIME_FORMAT)
def _enc_dnsname(self, val):
cap = self._cap_dnsname
if cap is None:
cap = capabilities.client_has_capability(self.version,
'dns_name_values')
self._cap_dnsname = cap
if cap:
return {'__dns_name__': unicode(val)}
else:
return unicode(val)
def _enc_bytes(self, val):
encoded = base64.b64encode(val) encoded = base64.b64encode(val)
if not six.PY2: if not six.PY2:
encoded = encoded.decode('ascii') encoded = encoded.decode('ascii')
return {'__base64__': encoded} return {'__base64__': encoded}
elif isinstance(val, Decimal):
return unicode(val) def _enc_list(self, val, _identity=_identity):
elif isinstance(val, DN): result = []
return str(val) append = result.append
elif isinstance(val, datetime.datetime): for v in val:
if capabilities.client_has_capability(version, 'datetime_values'): func = self[v.__class__]
return {'__datetime__': val.strftime(LDAP_GENERALIZED_TIME_FORMAT)} append(v if func is _identity else func(v))
else: return result
return val.strftime(LDAP_GENERALIZED_TIME_FORMAT)
elif isinstance(val, DNSName): def _enc_dict(self, val, _identity=_identity, _iteritems=six.iteritems):
if capabilities.client_has_capability(version, 'dns_name_values'): result = {}
return {'__dns_name__': unicode(val)} for k, v in _iteritems(val):
else: func = self[v.__class__]
return unicode(val) result[k] = v if func is _identity else func(v)
elif isinstance(val, Principal): return result
return unicode(val)
def json_encode_binary(val, version):
"""
JSON cannot encode binary values. We encode binary values in Python str
objects and text in Python unicode objects. In order to allow a binary
object to be passed through JSON we base64 encode it thus converting it to
text which JSON can transport. To assure we recognize the value is a base64
encoded representation of the original binary value and not confuse it with
other text we convert the binary value to a dict in this form:
{'__base64__' : base64_encoding_of_binary_value}
This modification of the original input value cannot be done "in place" as
one might first assume (e.g. replacing any binary items in a container
(e.g. list, tuple, dict) with the base64 dict because the container might
be an immutable object (i.e. a tuple). Therefore this function returns a
copy of any container objects it encounters with tuples replaced by lists.
This is O.K. because the JSON encoding will map both lists and tuples to
JSON arrays.
"""
result = _JSONConverter(version).convert(val)
return json.dumps(result)
def _ipa_obj_hook(dct):
if '__base64__' in dct:
return base64.b64decode(dct['__base64__'])
elif '__datetime__' in dct:
return datetime.datetime.strptime(dct['__datetime__'],
LDAP_GENERALIZED_TIME_FORMAT)
elif '__dns_name__' in dct:
return DNSName(dct['__dns_name__'])
else: else:
return val return dct
def json_decode_binary(val): def json_decode_binary(val):
''' """
JSON cannot transport binary data. In order to transport binary data we JSON cannot transport binary data. In order to transport binary data we
convert binary data to a form like this: convert binary data to a form like this:
{'__base64__' : base64_encoding_of_binary_value} {'__base64__' : base64_encoding_of_binary_value}
see json_encode_binary() see json_encode_binary()
After JSON had decoded the JSON stream back into a Python object we must After JSON had decoded the JSON stream back into a Python object we must
recursively scan the object looking for any dicts which might represent recursively scan the object looking for any dicts which might represent
@ -345,31 +418,11 @@ def json_decode_binary(val):
don't modify objects in place because of side effects which may be don't modify objects in place because of side effects which may be
dangerous. Thus we elect to spend a few more cycles and avoid the dangerous. Thus we elect to spend a few more cycles and avoid the
possibility of unintended side effects in favor of robustness. possibility of unintended side effects in favor of robustness.
''' """
if isinstance(val, bytes):
val = val.decode('utf-8')
if isinstance(val, dict): return json.loads(val, object_hook=_ipa_obj_hook)
if '__base64__' in val:
return base64.b64decode(val['__base64__'])
elif '__datetime__' in val:
return datetime.datetime.strptime(val['__datetime__'],
LDAP_GENERALIZED_TIME_FORMAT)
elif '__dns_name__' in val:
return DNSName(val['__dns_name__'])
else:
return dict((k, json_decode_binary(v)) for k, v in val.items())
elif isinstance(val, list):
return tuple(json_decode_binary(v) for v in val)
else:
if isinstance(val, bytes):
try:
return val.decode('utf-8')
except UnicodeDecodeError:
raise ConversionError(
name=val,
error='incorrect type'
)
else:
return val
def decode_fault(e, encoding='UTF-8'): def decode_fault(e, encoding='UTF-8'):
@ -1105,27 +1158,27 @@ class JSONServerProxy(object):
payload = json_encode_binary(payload, version) payload = json_encode_binary(payload, version)
if self.__verbose >= 2: if self.__verbose >= 2:
root_logger.info('Request: %s', root_logger.info(
json.dumps(payload, sort_keys=True, indent=4)) 'Request: %s',
json.dumps(json.loads(payload), sort_keys=True, indent=4)
)
response = self.__transport.request( response = self.__transport.request(
self.__host, self.__host,
self.__handler, self.__handler,
json.dumps(payload).encode('utf-8'), payload.encode('utf-8'),
verbose=self.__verbose >= 3, verbose=self.__verbose >= 3,
) )
try: try:
response = json_decode_binary( response = json_decode_binary(response)
json.loads(response.decode('utf-8')))
except ValueError as e: except ValueError as e:
raise JSONError(error=str(e)) raise JSONError(error=str(e))
if self.__verbose >= 2: if self.__verbose >= 2:
root_logger.info( root_logger.info(
'Response: %s', 'Response: %s',
json.dumps(json_encode_binary(response, version), json.dumps(response, sort_keys=True, indent=4)
sort_keys=True, indent=4)
) )
error = response.get('error') error = response.get('error')
if error: if error:

View File

@ -25,8 +25,8 @@ Also see the `ipalib.rpc` module.
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
import os import os
import json
import traceback import traceback
import gssapi import gssapi
import requests import requests
@ -483,13 +483,12 @@ class jsonserver(WSGIExecutioner, HTTP_Status):
principal=unicode(principal), principal=unicode(principal),
version=unicode(VERSION), version=unicode(VERSION),
) )
response = json_encode_binary(response, version) dump = json_encode_binary(response, version)
dump = json.dumps(response, sort_keys=True, indent=4)
return dump.encode('utf-8') return dump.encode('utf-8')
def unmarshal(self, data): def unmarshal(self, data):
try: try:
d = json.loads(data) d = json_decode_binary(data)
except ValueError as e: except ValueError as e:
raise JSONError(error=e) raise JSONError(error=e)
if not isinstance(d, dict): if not isinstance(d, dict):
@ -498,7 +497,6 @@ class jsonserver(WSGIExecutioner, HTTP_Status):
raise JSONError(error=_('Request is missing "method"')) raise JSONError(error=_('Request is missing "method"'))
if 'params' not in d: if 'params' not in d:
raise JSONError(error=_('Request is missing "params"')) raise JSONError(error=_('Request is missing "params"'))
d = json_decode_binary(d)
method = d['method'] method = d['method']
params = d['params'] params = d['params']
_id = d.get('id') _id = d.get('id')