Add full support and testsfor all PG server side encodings. Fixes #3992. Fixes #3982. Fixes #3911

This commit is contained in:
Khushboo Vashi 2019-03-01 13:51:50 +00:00 committed by Dave Page
parent fb747b8031
commit 849e34c2f7
5 changed files with 380 additions and 62 deletions

View File

@ -14,7 +14,7 @@ from pgadmin.browser.server_groups.servers.databases.tests import utils as \
from regression import parent_node_dict
from regression.python_test_utils import test_utils
import json
from pgadmin.utils import server_utils
from pgadmin.utils import server_utils, IS_PY2
class TestEncodingCharset(BaseTestGenerator):
@ -31,13 +31,6 @@ class TestEncodingCharset(BaseTestGenerator):
lc_collate='C',
test_str='A'
)),
(
'With Encoding WIN1252',
dict(
db_encoding='WIN1252',
lc_collate='C',
test_str='A'
)),
(
'With Encoding EUC_CN',
dict(
@ -50,14 +43,14 @@ class TestEncodingCharset(BaseTestGenerator):
dict(
db_encoding='SQL_ASCII',
lc_collate='C',
test_str='\\255'
test_str='Tif'
)),
(
'With Encoding LATIN1',
dict(
db_encoding='LATIN1',
lc_collate='C',
test_str='Ň'
test_str='äöüßÑ'
)),
(
'With Encoding LATIN2',
@ -66,7 +59,174 @@ class TestEncodingCharset(BaseTestGenerator):
lc_collate='C',
test_str='§'
)),
(
'With Encoding LATIN9',
dict(
db_encoding='LATIN9',
lc_collate='C',
test_str='äöüß'
)),
(
'With Encoding EUC_JIS_2004',
dict(
db_encoding='EUC_JIS_2004',
lc_collate='C',
test_str='じんぼはりんごをたべる'
)),
(
'With Encoding WIN1256',
dict(
db_encoding='WIN1256',
lc_collate='C',
test_str='صباح الخير'
)),
(
'With Encoding WIN866',
dict(
db_encoding='WIN866',
lc_collate='C',
test_str='Альтернативная'
)),
(
'With Encoding WIN874',
dict(
db_encoding='WIN874',
lc_collate='C',
test_str='กลิ่นหอม'
)),
(
'With Encoding WIN1250',
dict(
db_encoding='WIN1250',
lc_collate='C',
test_str='ŔÁÄÇ'
)),
(
'With Encoding WIN1251',
dict(
db_encoding='WIN1251',
lc_collate='C',
test_str='ЖИЙЮ'
)),
(
'With Encoding WIN1252',
dict(
db_encoding='WIN1252',
lc_collate='C',
test_str='ÆØÙü'
)),
(
'With Encoding WIN1253',
dict(
db_encoding='WIN1253',
lc_collate='C',
test_str='ΨΪμΫ'
)),
(
'With Encoding WIN1254',
dict(
db_encoding='WIN1254',
lc_collate='C',
test_str='ĞğØŠ'
)),
(
'With Encoding WIN1255',
dict(
db_encoding='WIN1255',
lc_collate='C',
test_str='₪¥©¾'
)),
(
'With Encoding WIN1256',
dict(
db_encoding='WIN1256',
lc_collate='C',
test_str='بؤغق'
)),
(
'With Encoding WIN1257',
dict(
db_encoding='WIN1257',
lc_collate='C',
test_str='‰ķģž'
)),
(
'With Encoding WIN1258',
dict(
db_encoding='WIN1258',
lc_collate='C',
test_str='₫SHYÑđ'
)),
(
'With Encoding EUC_CN',
dict(
db_encoding='EUC_CN',
lc_collate='C',
test_str='汉字不灭'
)),
(
'With Encoding EUC_JP',
dict(
db_encoding='EUC_JP',
lc_collate='C',
test_str='での日本'
)),
(
'With Encoding EUC_KR',
dict(
db_encoding='EUC_KR',
lc_collate='C',
test_str='ㄱㄲㄴㄷ'
)),
(
'With Encoding EUC_TW',
dict(
db_encoding='EUC_TW',
lc_collate='C',
test_str='中文'
)),
(
'With Encoding ISO_8859_5',
dict(
db_encoding='ISO_8859_5',
lc_collate='C',
test_str='ЁЎФЮ'
)),
(
'With Encoding ISO_8859_6',
dict(
db_encoding='ISO_8859_6',
lc_collate='C',
test_str='العَرَبِيَّة'
)),
(
'With Encoding ISO_8859_7',
dict(
db_encoding='ISO_8859_7',
lc_collate='C',
test_str='ελληνικά'
)),
(
'With Encoding ISO_8859_8',
dict(
db_encoding='ISO_8859_8',
lc_collate='C',
test_str='דבא'
)),
(
'With Encoding KOI8R',
dict(
db_encoding='KOI8R',
lc_collate='C',
test_str='Альтернативная'
)),
(
'With Encoding KOI8U',
dict(
db_encoding='KOI8U',
lc_collate='C',
test_str='українська'
)),
]
def setUp(self):
@ -113,6 +273,11 @@ class TestEncodingCharset(BaseTestGenerator):
self.assertEquals(response.status_code, 200)
response_data = json.loads(response.data.decode('utf-8'))
self.assertEquals(response_data['data']['rows_fetched_to'], 1)
if IS_PY2 and type(response_data['data']['result'][0][0]) == unicode:
result = response_data['data']['result'][0][0].encode('utf-8')
else:
result = response_data['data']['result'][0][0]
self.assertEquals(result, self.test_str)
database_utils.disconnect_database(self, self.encode_sid,
self.encode_did)

View File

@ -400,7 +400,7 @@ class Connection(BaseConnection):
if self.use_binary_placeholder:
register_binary_typecasters(self.conn)
postgres_encoding, self.python_encoding = \
postgres_encoding, self.python_encoding, typecast_encoding = \
getEncoding(self.conn.encoding)
# Note that we use 'UPDATE pg_settings' for setting bytea_output as a
@ -647,11 +647,7 @@ WHERE
params: Extra parameters
"""
if sys.version_info < (3,):
if type(query) == unicode:
query = query.encode('utf-8')
else:
query = query.encode('utf-8')
query = query.encode(self.python_encoding)
params = self.escape_params_sqlascii(params)
cur.execute(query, params)
@ -680,16 +676,13 @@ WHERE
return False, str(cur)
query_id = random.randint(1, 9999999)
if IS_PY2 and type(query) == unicode:
query = query.encode('utf-8')
current_app.logger.log(
25,
u"Execute (with server cursor) for server #{server_id} - "
u"{conn_id} (Query-id: {query_id}):\n{query}".format(
server_id=self.manager.sid,
conn_id=self.conn_id,
query=query.decode('utf-8') if
query=query.decode(self.python_encoding) if
sys.version_info < (3,) else query,
query_id=query_id
)
@ -943,11 +936,9 @@ WHERE
formatted exception message
"""
if sys.version_info < (3,):
if type(query) == unicode:
query = query.encode('utf-8')
else:
query = query.encode('utf-8')
encoding = self.python_encoding
query = query.encode(encoding)
# Convert the params based on python_encoding
params = self.escape_params_sqlascii(params)
@ -965,7 +956,7 @@ WHERE
u"{query_id}):\n{query}".format(
server_id=self.manager.sid,
conn_id=self.conn_id,
query=query.decode('utf-8'),
query=query.decode(encoding),
query_id=query_id
)
)
@ -984,7 +975,7 @@ WHERE
u"Error Message:{errmsg}".format(
server_id=self.manager.sid,
conn_id=self.conn_id,
query=query.decode('utf-8'),
query=query.decode(encoding),
errmsg=errmsg,
query_id=query_id
)

View File

@ -10,20 +10,52 @@
# Get Postgres and Python encoding
encode_dict = {
'SQL_ASCII': ['SQL_ASCII', 'raw_unicode_escape'],
'SQLASCII': ['SQL_ASCII', 'raw_unicode_escape'],
'MULE_INTERNAL': ['MULE_INTERNAL', 'raw_unicode_escape'],
'MULEINTERNAL': ['MULEINTERNAL', 'raw_unicode_escape'],
'LATIN1': ['LATIN1', 'latin1'],
'LATIN2': ['LATIN2', 'latin2'],
'LATIN3': ['LATIN3', 'latin3'],
'LATIN4': ['LATIN4', 'latin4'],
'LATIN5': ['LATIN5', 'latin5'],
'LATIN6': ['LATIN6', 'latin6'],
'LATIN7': ['LATIN7', 'latin7'],
'LATIN8': ['LATIN8', 'latin8'],
'LATIN9': ['LATIN9', 'latin9'],
'LATIN10': ['LATIN10', 'latin10']
'SQL_ASCII': ['SQL_ASCII', 'raw_unicode_escape', 'unicode_escape'],
'SQLASCII': ['SQL_ASCII', 'raw_unicode_escape', 'unicode_escape'],
'MULE_INTERNAL': ['MULE_INTERNAL', 'raw_unicode_escape', 'unicode_escape'],
'MULEINTERNAL': ['MULEINTERNAL', 'raw_unicode_escape', 'unicode_escape'],
'LATIN1': ['LATIN1', 'latin1', 'latin1'],
'LATIN2': ['LATIN2', 'latin2', 'latin2'],
'LATIN3': ['LATIN3', 'latin3', 'latin3'],
'LATIN4': ['LATIN4', 'latin4', 'latin4'],
'LATIN5': ['LATIN5', 'latin5', 'latin5'],
'LATIN6': ['LATIN6', 'latin6', 'latin6'],
'LATIN7': ['LATIN7', 'latin7', 'latin7'],
'LATIN8': ['LATIN8', 'latin8', 'latin8'],
'LATIN9': ['LATIN9', 'latin9', 'latin9'],
'LATIN10': ['LATIN10', 'latin10', 'latin10'],
'WIN866': ['WIN866', 'cp866', 'cp866'],
'WIN874': ['WIN874', 'cp874', 'cp874'],
'WIN1250': ['WIN1250', 'cp1250', 'cp1250'],
'WIN1251': ['WIN1251', 'cp1251', 'cp1251'],
'WIN1252': ['WIN1252', 'cp1252', 'cp1252'],
'WIN1253': ['WIN1253', 'cp1253', 'cp1253'],
'WIN1254': ['WIN1254', 'cp1254', 'cp1254'],
'WIN1255': ['WIN1255', 'cp1255', 'cp1255'],
'WIN1256': ['WIN1256', 'cp1256', 'cp1256'],
'WIN1257': ['WIN1257', 'cp1257', 'cp1257'],
'WIN1258': ['WIN1258', 'cp1258', 'cp1258'],
'EUC_JIS_2004': ['EUC_JIS_2004', 'eucjis2004', 'eucjis2004'],
'EUCJIS2004': ['EUCJIS2004', 'eucjis2004', 'eucjis2004'],
'EUC_CN': ['EUC_CN', 'euc-cn', 'euc-cn'],
'EUCCN': ['EUCCN', 'euc-cn', 'euc-cn'],
'EUC_JP': ['EUC_JP', 'euc_jp', 'euc_jp'],
'EUCJP': ['EUCJP', 'euc_jp', 'euc_jp'],
'EUC_KR': ['EUC_KR', 'euc_kr', 'euc_kr'],
'EUCKR': ['EUCKR', 'euc_kr', 'euc_kr'],
'EUC_TW': ['BIG5', 'big5', 'big5'],
'EUCTW': ['BIG5', 'big5', 'big5'],
'ISO_8859_5': ['ISO_8859_5', 'iso8859_5', 'iso8859_5'],
'ISO88595': ['ISO88595', 'iso8859_5', 'iso8859_5'],
'ISO_8859_6': ['ISO_8859_6', 'iso8859_6', 'iso8859_6'],
'ISO88596': ['ISO88596', 'iso8859_6', 'iso8859_6'],
'ISO_8859_7': ['ISO_8859_7', 'iso8859_7', 'iso8859_7'],
'ISO88597': ['ISO88597', 'iso8859_7', 'iso8859_7'],
'ISO_8859_8': ['ISO_8859_8', 'iso8859_8', 'iso8859_8'],
'ISO88598': ['ISO88598', 'iso8859_8', 'iso8859_8'],
'KOI8R': ['KOI8R', 'koi8_r', 'koi8_r'],
'KOI8U': ['KOI8U', 'koi8_u', 'koi8_u'],
}
@ -33,7 +65,7 @@ def getEncoding(key):
:return:
[Postgres_encoding, Python_encoding] - Postgres and Python encoding
"""
return encode_dict.get(key, ['UNICODE', 'utf-8'])
return encode_dict.get(key, ['UNICODE', 'utf-8', 'utf-8'])
def configureDriverEncodings(encodings):
@ -43,5 +75,5 @@ def configureDriverEncodings(encodings):
# because for parameterized DML, param values are converted based on
# python encoding of pyscopg2s internal encodings dict.
for key, val in encode_dict.items():
postgres_encoding, python_encoding = val
postgres_encoding, python_encoding, typecast_encoding = val
encodings[key] = python_encoding

View File

@ -19,7 +19,7 @@ import psycopg2
from psycopg2.extensions import encodings
from psycopg2.extras import Json as psycopg2_json
from .encoding import configureDriverEncodings
from .encoding import configureDriverEncodings, getEncoding
configureDriverEncodings(encodings)
@ -182,20 +182,22 @@ def register_string_typecasters(connection):
# characters. Here we unescape them using unicode_escape
# and send ahead. When insert update is done, the characters
# are escaped again and sent to the DB.
if connection.encoding in ('SQL_ASCII', 'SQLASCII',
'MULE_INTERNAL', 'MULEINTERNAL'):
postgres_encoding, python_encoding, typecast_encoding = \
getEncoding(connection.encoding)
if postgres_encoding != 'UNICODE':
if sys.version_info >= (3,):
def non_ascii_escape(value, cursor):
if value is None:
return None
return bytes(
value, encodings[cursor.connection.encoding]
).decode('unicode_escape', errors='replace')
).decode(typecast_encoding, errors='replace')
else:
def non_ascii_escape(value, cursor):
if value is None:
return None
return value.decode('unicode_escape', errors='replace')
return value.decode(typecast_encoding, errors='replace')
# return value
unicode_type = psycopg2.extensions.new_type(

View File

@ -16,79 +16,207 @@ class TestEncoding(BaseTestGenerator):
'When the database encoding is SQL_ASCII',
dict(
db_encoding='SQL_ASCII',
expected_return_value=['SQL_ASCII', 'raw_unicode_escape']
expected_return_value=['SQL_ASCII', 'raw_unicode_escape',
'unicode_escape']
)
), (
'When the database encoding is MULEINTERNAL',
dict(
db_encoding='MULEINTERNAL',
expected_return_value=['MULEINTERNAL', 'raw_unicode_escape']
expected_return_value=['MULEINTERNAL', 'raw_unicode_escape',
'unicode_escape']
)
), (
'When the database encoding is LATIN1',
dict(
db_encoding='LATIN1',
expected_return_value=['LATIN1', 'latin1']
expected_return_value=['LATIN1', 'latin1', 'latin1']
)
), (
'When the database encoding is LATIN2',
dict(
db_encoding='LATIN2',
expected_return_value=['LATIN2', 'latin2']
expected_return_value=['LATIN2', 'latin2', 'latin2']
)
), (
'When the database encoding is LATIN3',
dict(
db_encoding='LATIN3',
expected_return_value=['LATIN3', 'latin3']
expected_return_value=['LATIN3', 'latin3', 'latin3']
)
), (
'When the database encoding is LATIN4',
dict(
db_encoding='LATIN4',
expected_return_value=['LATIN4', 'latin4']
expected_return_value=['LATIN4', 'latin4', 'latin4']
)
), (
'When the database encoding is LATIN5',
dict(
db_encoding='LATIN5',
expected_return_value=['LATIN5', 'latin5']
expected_return_value=['LATIN5', 'latin5', 'latin5']
)
), (
'When the database encoding is LATIN6',
dict(
db_encoding='LATIN6',
expected_return_value=['LATIN6', 'latin6']
expected_return_value=['LATIN6', 'latin6', 'latin6']
)
), (
'When the database encoding is LATIN7',
dict(
db_encoding='LATIN7',
expected_return_value=['LATIN7', 'latin7']
expected_return_value=['LATIN7', 'latin7', 'latin7']
)
), (
'When the database encoding is LATIN8',
dict(
db_encoding='LATIN8',
expected_return_value=['LATIN8', 'latin8']
expected_return_value=['LATIN8', 'latin8', 'latin8']
)
), (
'When the database encoding is LATIN9',
dict(
db_encoding='LATIN9',
expected_return_value=['LATIN9', 'latin9']
expected_return_value=['LATIN9', 'latin9', 'latin9']
)
), (
'When the database encoding is LATIN10',
dict(
db_encoding='LATIN10',
expected_return_value=['LATIN10', 'latin10']
expected_return_value=['LATIN10', 'latin10', 'latin10']
)
), (
'When the database encoding is WIN1250',
dict(
db_encoding='WIN1250',
expected_return_value=['WIN1250', 'cp1250', 'cp1250']
)
), (
'When the database encoding is WIN1251',
dict(
db_encoding='WIN1251',
expected_return_value=['WIN1251', 'cp1251', 'cp1251']
)
), (
'When the database encoding is WIN1252',
dict(
db_encoding='WIN1252',
expected_return_value=['WIN1252', 'cp1252', 'cp1252']
)
), (
'When the database encoding is WIN1253',
dict(
db_encoding='WIN1253',
expected_return_value=['WIN1253', 'cp1253', 'cp1253']
)
), (
'When the database encoding is WIN1254',
dict(
db_encoding='WIN1254',
expected_return_value=['WIN1254', 'cp1254', 'cp1254']
)
), (
'When the database encoding is WIN1255',
dict(
db_encoding='WIN1255',
expected_return_value=['WIN1255', 'cp1255', 'cp1255']
)
), (
'When the database encoding is WIN1256',
dict(
db_encoding='WIN1256',
expected_return_value=['WIN1256', 'cp1256', 'cp1256']
)
), (
'When the database encoding is WIN1257',
dict(
db_encoding='WIN1257',
expected_return_value=['WIN1257', 'cp1257', 'cp1257']
)
), (
'When the database encoding is WIN1258',
dict(
db_encoding='WIN1258',
expected_return_value=['UNICODE', 'utf-8']
expected_return_value=['WIN1258', 'cp1258', 'cp1258']
)
), (
'When the database encoding is EUC_JIS_2004',
dict(
db_encoding='EUC_JIS_2004',
expected_return_value=['EUC_JIS_2004', 'eucjis2004', 'eucjis2004']
)
), (
'When the database encoding is EUC_CN',
dict(
db_encoding='EUC_CN',
expected_return_value=['EUC_CN', 'euc-cn', 'euc-cn']
)
), (
'When the database encoding is EUC_JP',
dict(
db_encoding='EUC_JP',
expected_return_value=['EUC_JP', 'euc_jp', 'euc_jp']
)
), (
'When the database encoding is EUC_KR',
dict(
db_encoding='EUC_KR',
expected_return_value=['EUC_KR', 'euc_kr', 'euc_kr']
)
), (
'When the database encoding is EUC_TW',
dict(
db_encoding='EUC_TW',
expected_return_value=['BIG5', 'big5', 'big5']
)
), (
'When the database encoding is ISO_8859_5',
dict(
db_encoding='ISO_8859_5',
expected_return_value=['ISO_8859_5', 'iso8859_5', 'iso8859_5']
)
), (
'When the database encoding is ISO_8859_6',
dict(
db_encoding='ISO_8859_6',
expected_return_value=['ISO_8859_6', 'iso8859_6', 'iso8859_6']
)
), (
'When the database encoding is ISO_8859_7',
dict(
db_encoding='ISO_8859_7',
expected_return_value=['ISO_8859_7', 'iso8859_7', 'iso8859_7']
)
), (
'When the database encoding is ISO_8859_8',
dict(
db_encoding='ISO_8859_8',
expected_return_value=['ISO_8859_8', 'iso8859_8', 'iso8859_8']
)
), (
'When the database encoding is KOI8R',
dict(
db_encoding='KOI8R',
expected_return_value=['KOI8R', 'koi8_r', 'koi8_r']
)
), (
'When the database encoding is KOI8U',
dict(
db_encoding='KOI8U',
expected_return_value=['KOI8U', 'koi8_u', 'koi8_u']
)
), (
'When the database encoding is WIN866',
dict(
db_encoding='WIN866',
expected_return_value=['WIN866', 'cp866', 'cp866']
)
), (
'When the database encoding is WIN874',
dict(
db_encoding='WIN874',
expected_return_value=['WIN874', 'cp874', 'cp874']
)
),
]