Add full support and testsfor all PG server side encodings. Fixes #3992. Fixes #3982. Fixes #3911

This commit is contained in:
Khushboo Vashi
2019-03-01 13:51:50 +00:00
committed by Dave Page
parent fb747b8031
commit 849e34c2f7
5 changed files with 380 additions and 62 deletions

View File

@@ -400,7 +400,7 @@ class Connection(BaseConnection):
if self.use_binary_placeholder:
register_binary_typecasters(self.conn)
postgres_encoding, self.python_encoding = \
postgres_encoding, self.python_encoding, typecast_encoding = \
getEncoding(self.conn.encoding)
# Note that we use 'UPDATE pg_settings' for setting bytea_output as a
@@ -647,11 +647,7 @@ WHERE
params: Extra parameters
"""
if sys.version_info < (3,):
if type(query) == unicode:
query = query.encode('utf-8')
else:
query = query.encode('utf-8')
query = query.encode(self.python_encoding)
params = self.escape_params_sqlascii(params)
cur.execute(query, params)
@@ -680,16 +676,13 @@ WHERE
return False, str(cur)
query_id = random.randint(1, 9999999)
if IS_PY2 and type(query) == unicode:
query = query.encode('utf-8')
current_app.logger.log(
25,
u"Execute (with server cursor) for server #{server_id} - "
u"{conn_id} (Query-id: {query_id}):\n{query}".format(
server_id=self.manager.sid,
conn_id=self.conn_id,
query=query.decode('utf-8') if
query=query.decode(self.python_encoding) if
sys.version_info < (3,) else query,
query_id=query_id
)
@@ -943,11 +936,9 @@ WHERE
formatted exception message
"""
if sys.version_info < (3,):
if type(query) == unicode:
query = query.encode('utf-8')
else:
query = query.encode('utf-8')
encoding = self.python_encoding
query = query.encode(encoding)
# Convert the params based on python_encoding
params = self.escape_params_sqlascii(params)
@@ -965,7 +956,7 @@ WHERE
u"{query_id}):\n{query}".format(
server_id=self.manager.sid,
conn_id=self.conn_id,
query=query.decode('utf-8'),
query=query.decode(encoding),
query_id=query_id
)
)
@@ -984,7 +975,7 @@ WHERE
u"Error Message:{errmsg}".format(
server_id=self.manager.sid,
conn_id=self.conn_id,
query=query.decode('utf-8'),
query=query.decode(encoding),
errmsg=errmsg,
query_id=query_id
)

View File

@@ -10,20 +10,52 @@
# Get Postgres and Python encoding
encode_dict = {
'SQL_ASCII': ['SQL_ASCII', 'raw_unicode_escape'],
'SQLASCII': ['SQL_ASCII', 'raw_unicode_escape'],
'MULE_INTERNAL': ['MULE_INTERNAL', 'raw_unicode_escape'],
'MULEINTERNAL': ['MULEINTERNAL', 'raw_unicode_escape'],
'LATIN1': ['LATIN1', 'latin1'],
'LATIN2': ['LATIN2', 'latin2'],
'LATIN3': ['LATIN3', 'latin3'],
'LATIN4': ['LATIN4', 'latin4'],
'LATIN5': ['LATIN5', 'latin5'],
'LATIN6': ['LATIN6', 'latin6'],
'LATIN7': ['LATIN7', 'latin7'],
'LATIN8': ['LATIN8', 'latin8'],
'LATIN9': ['LATIN9', 'latin9'],
'LATIN10': ['LATIN10', 'latin10']
'SQL_ASCII': ['SQL_ASCII', 'raw_unicode_escape', 'unicode_escape'],
'SQLASCII': ['SQL_ASCII', 'raw_unicode_escape', 'unicode_escape'],
'MULE_INTERNAL': ['MULE_INTERNAL', 'raw_unicode_escape', 'unicode_escape'],
'MULEINTERNAL': ['MULEINTERNAL', 'raw_unicode_escape', 'unicode_escape'],
'LATIN1': ['LATIN1', 'latin1', 'latin1'],
'LATIN2': ['LATIN2', 'latin2', 'latin2'],
'LATIN3': ['LATIN3', 'latin3', 'latin3'],
'LATIN4': ['LATIN4', 'latin4', 'latin4'],
'LATIN5': ['LATIN5', 'latin5', 'latin5'],
'LATIN6': ['LATIN6', 'latin6', 'latin6'],
'LATIN7': ['LATIN7', 'latin7', 'latin7'],
'LATIN8': ['LATIN8', 'latin8', 'latin8'],
'LATIN9': ['LATIN9', 'latin9', 'latin9'],
'LATIN10': ['LATIN10', 'latin10', 'latin10'],
'WIN866': ['WIN866', 'cp866', 'cp866'],
'WIN874': ['WIN874', 'cp874', 'cp874'],
'WIN1250': ['WIN1250', 'cp1250', 'cp1250'],
'WIN1251': ['WIN1251', 'cp1251', 'cp1251'],
'WIN1252': ['WIN1252', 'cp1252', 'cp1252'],
'WIN1253': ['WIN1253', 'cp1253', 'cp1253'],
'WIN1254': ['WIN1254', 'cp1254', 'cp1254'],
'WIN1255': ['WIN1255', 'cp1255', 'cp1255'],
'WIN1256': ['WIN1256', 'cp1256', 'cp1256'],
'WIN1257': ['WIN1257', 'cp1257', 'cp1257'],
'WIN1258': ['WIN1258', 'cp1258', 'cp1258'],
'EUC_JIS_2004': ['EUC_JIS_2004', 'eucjis2004', 'eucjis2004'],
'EUCJIS2004': ['EUCJIS2004', 'eucjis2004', 'eucjis2004'],
'EUC_CN': ['EUC_CN', 'euc-cn', 'euc-cn'],
'EUCCN': ['EUCCN', 'euc-cn', 'euc-cn'],
'EUC_JP': ['EUC_JP', 'euc_jp', 'euc_jp'],
'EUCJP': ['EUCJP', 'euc_jp', 'euc_jp'],
'EUC_KR': ['EUC_KR', 'euc_kr', 'euc_kr'],
'EUCKR': ['EUCKR', 'euc_kr', 'euc_kr'],
'EUC_TW': ['BIG5', 'big5', 'big5'],
'EUCTW': ['BIG5', 'big5', 'big5'],
'ISO_8859_5': ['ISO_8859_5', 'iso8859_5', 'iso8859_5'],
'ISO88595': ['ISO88595', 'iso8859_5', 'iso8859_5'],
'ISO_8859_6': ['ISO_8859_6', 'iso8859_6', 'iso8859_6'],
'ISO88596': ['ISO88596', 'iso8859_6', 'iso8859_6'],
'ISO_8859_7': ['ISO_8859_7', 'iso8859_7', 'iso8859_7'],
'ISO88597': ['ISO88597', 'iso8859_7', 'iso8859_7'],
'ISO_8859_8': ['ISO_8859_8', 'iso8859_8', 'iso8859_8'],
'ISO88598': ['ISO88598', 'iso8859_8', 'iso8859_8'],
'KOI8R': ['KOI8R', 'koi8_r', 'koi8_r'],
'KOI8U': ['KOI8U', 'koi8_u', 'koi8_u'],
}
@@ -33,7 +65,7 @@ def getEncoding(key):
:return:
[Postgres_encoding, Python_encoding] - Postgres and Python encoding
"""
return encode_dict.get(key, ['UNICODE', 'utf-8'])
return encode_dict.get(key, ['UNICODE', 'utf-8', 'utf-8'])
def configureDriverEncodings(encodings):
@@ -43,5 +75,5 @@ def configureDriverEncodings(encodings):
# because for parameterized DML, param values are converted based on
# python encoding of pyscopg2s internal encodings dict.
for key, val in encode_dict.items():
postgres_encoding, python_encoding = val
postgres_encoding, python_encoding, typecast_encoding = val
encodings[key] = python_encoding

View File

@@ -19,7 +19,7 @@ import psycopg2
from psycopg2.extensions import encodings
from psycopg2.extras import Json as psycopg2_json
from .encoding import configureDriverEncodings
from .encoding import configureDriverEncodings, getEncoding
configureDriverEncodings(encodings)
@@ -182,20 +182,22 @@ def register_string_typecasters(connection):
# characters. Here we unescape them using unicode_escape
# and send ahead. When insert update is done, the characters
# are escaped again and sent to the DB.
if connection.encoding in ('SQL_ASCII', 'SQLASCII',
'MULE_INTERNAL', 'MULEINTERNAL'):
postgres_encoding, python_encoding, typecast_encoding = \
getEncoding(connection.encoding)
if postgres_encoding != 'UNICODE':
if sys.version_info >= (3,):
def non_ascii_escape(value, cursor):
if value is None:
return None
return bytes(
value, encodings[cursor.connection.encoding]
).decode('unicode_escape', errors='replace')
).decode(typecast_encoding, errors='replace')
else:
def non_ascii_escape(value, cursor):
if value is None:
return None
return value.decode('unicode_escape', errors='replace')
return value.decode(typecast_encoding, errors='replace')
# return value
unicode_type = psycopg2.extensions.new_type(