Make the browser more robust in the face of multibyte characters in SQL_ASCII databases. Fixes #3877

2025-02-25 18:55:31 -06:00 · 2019-01-11 21:55:49 +05:30
parent 252e05ef29
commit d641b47adb
5 changed files with 45 additions and 34 deletions
--- a/docs/en_US/release_notes_4_1.rst
+++ b/docs/en_US/release_notes_4_1.rst
@@ -19,3 +19,4 @@ Bug fixes
 | `Bug #3836 <https://redmine.postgresql.org/issues/3836>`_ - Fix ordering of VACUUM options which changed in PG11.
 | `Bug #3842 <https://redmine.postgresql.org/issues/3842>`_ - Don't show system catalogs in the schemas property list unless show system objects is enabled.
 | `Bug #3861 <https://redmine.postgresql.org/issues/3861>`_ - Fix help for the backup/restore dialogues.
+| `Bug #3877 <https://redmine.postgresql.org/issues/3877>`_ - Make the browser more robust in the face of multibyte characters in SQL_ASCII databases.
--- a/web/pgadmin/utils/driver/psycopg2/connection.py
+++ b/web/pgadmin/utils/driver/psycopg2/connection.py
@@ -36,7 +36,7 @@ from .cursor import DictCursor
 from .typecast import register_global_typecasters, \
    register_string_typecasters, register_binary_typecasters, \
    register_array_to_string_typecasters, ALL_JSON_TYPES
-from .encoding import getEncoding
+from .encoding import getEncoding, configureDriverEncodings
 from pgadmin.utils import csv

 if sys.version_info < (3,):
@@ -50,7 +50,7 @@ _ = gettext

 # Register global type caster which will be applicable to all connections.
 register_global_typecasters()
-
+configureDriverEncodings(encodings)

 class Connection(BaseConnection):
    """
@@ -408,14 +408,6 @@ class Connection(BaseConnection):
                               "SET client_encoding='{0}';"
                          .format(postgres_encoding))

-        # Replace the python encoding for original name and renamed encodings
-        # psycopg2 removes the underscore in conn.encoding
-        # Setting the encodings dict value will only help for select statements
-        # because for parameterized DML, param values are converted based on
-        # python encoding of pyscopg2s internal encodings dict.
-        for key, val in encodings.items():
-            if key.replace('_', '') == self.conn.encoding:
-                encodings[key] = self.python_encoding

        if status is not None:
            self.conn.close()
@@ -627,10 +619,10 @@ WHERE
                    # "unicode_escape" will convert single backslash to double
                    # backslash, so we will have to replace/revert them again
                    # to store the correct value into the database.
-                    if isinstance(val, six.string_types):
-                        modified_val = val.encode('unicode_escape')\
-                            .decode('raw_unicode_escape')\
-                            .replace("\\\\", "\\")
+                    # if isinstance(val, six.string_types):
+                    #     modified_val = val.encode('unicode_escape')\
+                    #         .decode('raw_unicode_escape')\
+                    #         .replace("\\\\", "\\")

                    params[key] = modified_val

--- a/web/pgadmin/utils/driver/psycopg2/cursor.py
+++ b/web/pgadmin/utils/driver/psycopg2/cursor.py
@@ -18,7 +18,10 @@ try:
 except ImportError:
    from ordereddict import OrderedDict

-from psycopg2.extensions import cursor as _cursor
+from psycopg2.extensions import cursor as _cursor, encodings
+from .encoding import configureDriverEncodings
+
+configureDriverEncodings(encodings)


 class _WrapperColumn(object):
--- a/web/pgadmin/utils/driver/psycopg2/encoding.py
+++ b/web/pgadmin/utils/driver/psycopg2/encoding.py
@@ -9,6 +9,23 @@

 #  Get Postgres and Python encoding

+encode_dict = {
+    'SQL_ASCII': ['SQL_ASCII', 'raw_unicode_escape'],
+    'SQLASCII': ['SQL_ASCII', 'raw_unicode_escape'],
+    'MULE_INTERNAL': ['MULE_INTERNAL', 'raw_unicode_escape'],
+    'MULEINTERNAL': ['MULEINTERNAL', 'raw_unicode_escape'],
+    'LATIN1': ['LATIN1', 'latin1'],
+    'LATIN2': ['LATIN2', 'latin2'],
+    'LATIN3': ['LATIN3', 'latin3'],
+    'LATIN4': ['LATIN4', 'latin4'],
+    'LATIN5': ['LATIN5', 'latin5'],
+    'LATIN6': ['LATIN6', 'latin6'],
+    'LATIN7': ['LATIN7', 'latin7'],
+    'LATIN8': ['LATIN8', 'latin8'],
+    'LATIN9': ['LATIN9', 'latin9'],
+    'LATIN10': ['LATIN10', 'latin10']
+}
+

 def getEncoding(key):
    """
@@ -16,21 +33,14 @@ def getEncoding(key):
    :return:
    [Postgres_encoding, Python_encoding] - Postgres and Python encoding
    """
-    encode_dict = {
-        'SQL_ASCII': ['SQL_ASCII', 'raw_unicode_escape'],
-        'SQLASCII': ['SQL_ASCII', 'raw_unicode_escape'],
-        'MULE_INTERNAL': ['MULE_INTERNAL', 'raw_unicode_escape'],
-        'MULEINTERNAL': ['MULEINTERNAL', 'raw_unicode_escape'],
-        'LATIN1': ['LATIN1', 'latin1'],
-        'LATIN2': ['LATIN2', 'latin2'],
-        'LATIN3': ['LATIN3', 'latin3'],
-        'LATIN4': ['LATIN4', 'latin4'],
-        'LATIN5': ['LATIN5', 'latin5'],
-        'LATIN6': ['LATIN6', 'latin6'],
-        'LATIN7': ['LATIN7', 'latin7'],
-        'LATIN8': ['LATIN8', 'latin8'],
-        'LATIN9': ['LATIN9', 'latin9'],
-        'LATIN10': ['LATIN10', 'latin10']
-    }
-
    return encode_dict.get(key, ['UNICODE', 'utf-8'])
+
+def configureDriverEncodings(encodings):
+    # Replace the python encoding for original name and renamed encodings
+    # psycopg2 removes the underscore in conn.encoding
+    # Setting the encodings dict value will only help for select statements
+    # because for parameterized DML, param values are converted based on
+    # python encoding of pyscopg2s internal encodings dict.
+    for key, val in encode_dict.items():
+        postgres_encoding, python_encoding = val
+        encodings[key] = python_encoding
--- a/web/pgadmin/utils/driver/psycopg2/typecast.py
+++ b/web/pgadmin/utils/driver/psycopg2/typecast.py
@@ -18,6 +18,10 @@ from psycopg2 import STRING as _STRING
 import psycopg2
 from psycopg2.extensions import encodings

+from .encoding import configureDriverEncodings
+
+configureDriverEncodings(encodings)
+

 # OIDs of data types which need to typecast as string to avoid JavaScript
 # compatibility issues.
@@ -176,12 +180,13 @@ def register_string_typecasters(connection):
                    return None
                return bytes(
                    value, encodings[cursor.connection.encoding]
-                ).decode('raw_unicode_escape')
+                ).decode('unicode_escape', errors='replace')
        else:
            def non_ascii_escape(value, cursor):
                if value is None:
                    return None
-                return value.decode('raw_unicode_escape')
+                return value.decode('unicode_escape', errors='replace')
+                # return value

        unicode_type = psycopg2.extensions.new_type(
            # "char", name, text, character, character varying