Add global "locale" config option and make key-ordering rule locale-aware

Support sorting by locale with strcoll(). Properly handle case and accents.

Note: this is a second implementation, for context see:
https://github.com/adrienverge/yamllint/pull/280
https://github.com/adrienverge/yamllint/issues/285
https://github.com/adrienverge/yamllint/pull/288
This commit is contained in:
Wolfgang Walther 2020-07-16 09:34:13 +02:00 committed by GitHub
parent 0fceca2354
commit b5b436a3a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 149 additions and 11 deletions

View File

@ -189,3 +189,22 @@ Here is a more complex example:
ignore: |
*.ignore-trailing-spaces.yaml
ascii-art/*
Setting the locale
------------------
It is possible to set the ``locale`` option globally. This is passed to Python's
`locale.setlocale
<https://docs.python.org/3/library/locale.html#locale.setlocale>`_,
so an empty string ``""`` will use the system default locale, while e.g.
``"en_US.UTF-8"`` will use that.
Currently this only affects the ``key-ordering`` rule. The default will order
by Unicode code point number, while locales will sort case and accents
properly as well.
.. code-block:: yaml
extends: default
locale: en_US.UTF-8

View File

@ -14,6 +14,8 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import locale
from tests.common import RuleTestCase
@ -103,10 +105,6 @@ class KeyOrderingTestCase(RuleTestCase):
'haïr: true\n'
'hais: true\n', conf,
problem=(3, 1))
self.check('---\n'
'haïr: true\n'
'hais: true\n', conf,
problem=(3, 1))
def test_key_tokens_in_flow_sequences(self):
conf = 'key-ordering: enable'
@ -114,3 +112,39 @@ class KeyOrderingTestCase(RuleTestCase):
'[\n'
' key: value, mappings, in, flow: sequence\n'
']\n', conf)
def test_locale_case(self):
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
conf = ('key-ordering: enable')
self.check('---\n'
't-shirt: 1\n'
'T-shirt: 2\n'
't-shirts: 3\n'
'T-shirts: 4\n', conf)
self.check('---\n'
't-shirt: 1\n'
't-shirts: 2\n'
'T-shirt: 3\n'
'T-shirts: 4\n', conf,
problem=(4, 1))
def test_locale_accents(self):
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
conf = ('key-ordering: enable')
self.check('---\n'
'hair: true\n'
'haïr: true\n'
'hais: true\n'
'haïssable: true\n', conf)
self.check('---\n'
'hais: true\n'
'haïr: true\n', conf,
problem=(3, 1))

View File

@ -95,6 +95,13 @@ class CommandLineTestCase(unittest.TestCase):
# dos line endings yaml
'dos.yml': '---\r\n'
'dos: true',
# different key-ordering by locale
'c.yaml': '---\n'
'A: true\n'
'a: true',
'en.yaml': '---\n'
'a: true\n'
'A: true'
})
@classmethod
@ -108,8 +115,10 @@ class CommandLineTestCase(unittest.TestCase):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/directory.yaml/empty.yml'),
os.path.join(self.wd, 'sub/ok.yaml'),
@ -146,6 +155,8 @@ class CommandLineTestCase(unittest.TestCase):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/ok.yaml'),
os.path.join(self.wd, 'warn.yaml')]
@ -175,8 +186,10 @@ class CommandLineTestCase(unittest.TestCase):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
@ -194,8 +207,10 @@ class CommandLineTestCase(unittest.TestCase):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
@ -315,6 +330,46 @@ class CommandLineTestCase(unittest.TestCase):
cli.run((os.path.join(self.wd, 'a.yaml'), ))
self.assertEqual(ctx.returncode, 1)
def test_run_with_locale(self):
# check for availability of locale, otherwise skip the test
# reset to default before running the test,
# as the first two runs don't use setlocale()
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
locale.setlocale(locale.LC_ALL, (None, None))
# C + en.yaml should fail
with RunContext(self) as ctx:
cli.run(('-d', 'rules: { key-ordering: enable }',
os.path.join(self.wd, 'en.yaml')))
self.assertEqual(ctx.returncode, 1)
# C + c.yaml should pass
with RunContext(self) as ctx:
cli.run(('-d', 'rules: { key-ordering: enable }',
os.path.join(self.wd, 'c.yaml')))
self.assertEqual(ctx.returncode, 0)
# the next two runs use setlocale() inside,
# so we need to clean up afterwards
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
# en_US + en.yaml should pass
with RunContext(self) as ctx:
cli.run(('-d', 'locale: en_US.UTF-8\n'
'rules: { key-ordering: enable }',
os.path.join(self.wd, 'en.yaml')))
self.assertEqual(ctx.returncode, 0)
# en_US + c.yaml should fail
with RunContext(self) as ctx:
cli.run(('-d', 'locale: en_US.UTF-8\n'
'rules: { key-ordering: enable }',
os.path.join(self.wd, 'c.yaml')))
self.assertEqual(ctx.returncode, 1)
def test_run_version(self):
with RunContext(self) as ctx:
cli.run(('--version', ))
@ -375,12 +430,11 @@ class CommandLineTestCase(unittest.TestCase):
# Make sure the default localization conditions on this "system"
# support UTF-8 encoding.
loc = locale.getlocale()
try:
locale.setlocale(locale.LC_ALL, 'C.UTF-8')
locale.setlocale(locale.LC_ALL, (None, 'UTF-8'))
except locale.Error:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
self.addCleanup(locale.setlocale, locale.LC_ALL, loc)
self.skipTest('no UTF-8 locale available')
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
with RunContext(self) as ctx:
cli.run(('-f', 'parsable', path))

View File

@ -18,6 +18,7 @@ from __future__ import print_function
import argparse
import io
import locale
import os
import platform
import sys
@ -175,6 +176,9 @@ def run(argv=None):
print(e, file=sys.stderr)
sys.exit(-1)
if conf.locale is not None:
locale.setlocale(locale.LC_ALL, conf.locale)
max_level = 0
for file in find_files_recursively(args.files, conf):

View File

@ -35,6 +35,8 @@ class YamlLintConfig(object):
self.yaml_files = pathspec.PathSpec.from_lines(
'gitwildmatch', ['*.yaml', '*.yml', '.yamllint'])
self.locale = None
if file is not None:
with open(file) as f:
content = f.read()
@ -111,6 +113,12 @@ class YamlLintConfig(object):
self.yaml_files = pathspec.PathSpec.from_lines('gitwildmatch',
conf['yaml-files'])
if 'locale' in conf:
if not isinstance(conf['locale'], str):
raise YamlLintConfigError(
'invalid config: locale should be a string')
self.locale = conf['locale']
def validate(self):
for id in self.rules:
try:

View File

@ -16,8 +16,10 @@
"""
Use this rule to enforce alphabetical ordering of keys in mappings. The sorting
order uses the Unicode code point number. As a result, the ordering is
case-sensitive and not accent-friendly (see examples below).
order uses the Unicode code point number as a default. As a result, the
ordering is case-sensitive and not accent-friendly (see examples below).
This can be changed by setting the global ``locale`` option. This allows to
sort case and accents properly.
.. rubric:: Examples
@ -63,8 +65,24 @@ case-sensitive and not accent-friendly (see examples below).
- haïr: true
hais: true
#. With global option ``locale: "en_US.UTF-8"`` and rule ``key-ordering: {}``
as opposed to before, the following code snippet would now **PASS**:
::
- t-shirt: 1
T-shirt: 2
t-shirts: 3
T-shirts: 4
- hair: true
haïr: true
hais: true
haïssable: true
"""
from locale import strcoll
import yaml
from yamllint.linter import LintProblem
@ -101,7 +119,8 @@ def check(conf, token, prev, next, nextnext, context):
# This check is done because KeyTokens can be found inside flow
# sequences... strange, but allowed.
if len(context['stack']) > 0 and context['stack'][-1].type == MAP:
if any(next.value < key for key in context['stack'][-1].keys):
if any(strcoll(next.value, key) < 0
for key in context['stack'][-1].keys):
yield LintProblem(
next.start_mark.line + 1, next.start_mark.column + 1,
'wrong ordering of key "%s" in mapping' % next.value)