Add global "locale" config option and make key-ordering rule locale-aware

Support sorting by locale with strcoll(). Properly handle case and accents.

Note: this is a second implementation, for context see:
https://github.com/adrienverge/yamllint/pull/280
https://github.com/adrienverge/yamllint/issues/285
https://github.com/adrienverge/yamllint/pull/288
This commit is contained in:
Wolfgang Walther 2020-07-16 09:34:13 +02:00 committed by GitHub
parent 0fceca2354
commit b5b436a3a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 149 additions and 11 deletions

View File

@ -189,3 +189,22 @@ Here is a more complex example:
ignore: | ignore: |
*.ignore-trailing-spaces.yaml *.ignore-trailing-spaces.yaml
ascii-art/* ascii-art/*
Setting the locale
------------------
It is possible to set the ``locale`` option globally. This is passed to Python's
`locale.setlocale
<https://docs.python.org/3/library/locale.html#locale.setlocale>`_,
so an empty string ``""`` will use the system default locale, while e.g.
``"en_US.UTF-8"`` will use that.
Currently this only affects the ``key-ordering`` rule. The default will order
by Unicode code point number, while locales will sort case and accents
properly as well.
.. code-block:: yaml
extends: default
locale: en_US.UTF-8

View File

@ -14,6 +14,8 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import locale
from tests.common import RuleTestCase from tests.common import RuleTestCase
@ -103,10 +105,6 @@ class KeyOrderingTestCase(RuleTestCase):
'haïr: true\n' 'haïr: true\n'
'hais: true\n', conf, 'hais: true\n', conf,
problem=(3, 1)) problem=(3, 1))
self.check('---\n'
'haïr: true\n'
'hais: true\n', conf,
problem=(3, 1))
def test_key_tokens_in_flow_sequences(self): def test_key_tokens_in_flow_sequences(self):
conf = 'key-ordering: enable' conf = 'key-ordering: enable'
@ -114,3 +112,39 @@ class KeyOrderingTestCase(RuleTestCase):
'[\n' '[\n'
' key: value, mappings, in, flow: sequence\n' ' key: value, mappings, in, flow: sequence\n'
']\n', conf) ']\n', conf)
def test_locale_case(self):
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
conf = ('key-ordering: enable')
self.check('---\n'
't-shirt: 1\n'
'T-shirt: 2\n'
't-shirts: 3\n'
'T-shirts: 4\n', conf)
self.check('---\n'
't-shirt: 1\n'
't-shirts: 2\n'
'T-shirt: 3\n'
'T-shirts: 4\n', conf,
problem=(4, 1))
def test_locale_accents(self):
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
conf = ('key-ordering: enable')
self.check('---\n'
'hair: true\n'
'haïr: true\n'
'hais: true\n'
'haïssable: true\n', conf)
self.check('---\n'
'hais: true\n'
'haïr: true\n', conf,
problem=(3, 1))

View File

@ -95,6 +95,13 @@ class CommandLineTestCase(unittest.TestCase):
# dos line endings yaml # dos line endings yaml
'dos.yml': '---\r\n' 'dos.yml': '---\r\n'
'dos: true', 'dos: true',
# different key-ordering by locale
'c.yaml': '---\n'
'A: true\n'
'a: true',
'en.yaml': '---\n'
'a: true\n'
'A: true'
}) })
@classmethod @classmethod
@ -108,8 +115,10 @@ class CommandLineTestCase(unittest.TestCase):
self.assertEqual( self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)), sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'), [os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'), os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'), os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'), os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/directory.yaml/empty.yml'), os.path.join(self.wd, 'sub/directory.yaml/empty.yml'),
os.path.join(self.wd, 'sub/ok.yaml'), os.path.join(self.wd, 'sub/ok.yaml'),
@ -146,6 +155,8 @@ class CommandLineTestCase(unittest.TestCase):
self.assertEqual( self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)), sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'), [os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'), os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/ok.yaml'), os.path.join(self.wd, 'sub/ok.yaml'),
os.path.join(self.wd, 'warn.yaml')] os.path.join(self.wd, 'warn.yaml')]
@ -175,8 +186,10 @@ class CommandLineTestCase(unittest.TestCase):
self.assertEqual( self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)), sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'), [os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'), os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'), os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 'no-yaml.json'), os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'), os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'), os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
@ -194,8 +207,10 @@ class CommandLineTestCase(unittest.TestCase):
self.assertEqual( self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)), sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'), [os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'), os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'), os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 'no-yaml.json'), os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'), os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'), os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
@ -315,6 +330,46 @@ class CommandLineTestCase(unittest.TestCase):
cli.run((os.path.join(self.wd, 'a.yaml'), )) cli.run((os.path.join(self.wd, 'a.yaml'), ))
self.assertEqual(ctx.returncode, 1) self.assertEqual(ctx.returncode, 1)
def test_run_with_locale(self):
# check for availability of locale, otherwise skip the test
# reset to default before running the test,
# as the first two runs don't use setlocale()
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
locale.setlocale(locale.LC_ALL, (None, None))
# C + en.yaml should fail
with RunContext(self) as ctx:
cli.run(('-d', 'rules: { key-ordering: enable }',
os.path.join(self.wd, 'en.yaml')))
self.assertEqual(ctx.returncode, 1)
# C + c.yaml should pass
with RunContext(self) as ctx:
cli.run(('-d', 'rules: { key-ordering: enable }',
os.path.join(self.wd, 'c.yaml')))
self.assertEqual(ctx.returncode, 0)
# the next two runs use setlocale() inside,
# so we need to clean up afterwards
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
# en_US + en.yaml should pass
with RunContext(self) as ctx:
cli.run(('-d', 'locale: en_US.UTF-8\n'
'rules: { key-ordering: enable }',
os.path.join(self.wd, 'en.yaml')))
self.assertEqual(ctx.returncode, 0)
# en_US + c.yaml should fail
with RunContext(self) as ctx:
cli.run(('-d', 'locale: en_US.UTF-8\n'
'rules: { key-ordering: enable }',
os.path.join(self.wd, 'c.yaml')))
self.assertEqual(ctx.returncode, 1)
def test_run_version(self): def test_run_version(self):
with RunContext(self) as ctx: with RunContext(self) as ctx:
cli.run(('--version', )) cli.run(('--version', ))
@ -375,12 +430,11 @@ class CommandLineTestCase(unittest.TestCase):
# Make sure the default localization conditions on this "system" # Make sure the default localization conditions on this "system"
# support UTF-8 encoding. # support UTF-8 encoding.
loc = locale.getlocale()
try: try:
locale.setlocale(locale.LC_ALL, 'C.UTF-8') locale.setlocale(locale.LC_ALL, (None, 'UTF-8'))
except locale.Error: except locale.Error:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') self.skipTest('no UTF-8 locale available')
self.addCleanup(locale.setlocale, locale.LC_ALL, loc) self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
with RunContext(self) as ctx: with RunContext(self) as ctx:
cli.run(('-f', 'parsable', path)) cli.run(('-f', 'parsable', path))

View File

@ -18,6 +18,7 @@ from __future__ import print_function
import argparse import argparse
import io import io
import locale
import os import os
import platform import platform
import sys import sys
@ -175,6 +176,9 @@ def run(argv=None):
print(e, file=sys.stderr) print(e, file=sys.stderr)
sys.exit(-1) sys.exit(-1)
if conf.locale is not None:
locale.setlocale(locale.LC_ALL, conf.locale)
max_level = 0 max_level = 0
for file in find_files_recursively(args.files, conf): for file in find_files_recursively(args.files, conf):

View File

@ -35,6 +35,8 @@ class YamlLintConfig(object):
self.yaml_files = pathspec.PathSpec.from_lines( self.yaml_files = pathspec.PathSpec.from_lines(
'gitwildmatch', ['*.yaml', '*.yml', '.yamllint']) 'gitwildmatch', ['*.yaml', '*.yml', '.yamllint'])
self.locale = None
if file is not None: if file is not None:
with open(file) as f: with open(file) as f:
content = f.read() content = f.read()
@ -111,6 +113,12 @@ class YamlLintConfig(object):
self.yaml_files = pathspec.PathSpec.from_lines('gitwildmatch', self.yaml_files = pathspec.PathSpec.from_lines('gitwildmatch',
conf['yaml-files']) conf['yaml-files'])
if 'locale' in conf:
if not isinstance(conf['locale'], str):
raise YamlLintConfigError(
'invalid config: locale should be a string')
self.locale = conf['locale']
def validate(self): def validate(self):
for id in self.rules: for id in self.rules:
try: try:

View File

@ -16,8 +16,10 @@
""" """
Use this rule to enforce alphabetical ordering of keys in mappings. The sorting Use this rule to enforce alphabetical ordering of keys in mappings. The sorting
order uses the Unicode code point number. As a result, the ordering is order uses the Unicode code point number as a default. As a result, the
case-sensitive and not accent-friendly (see examples below). ordering is case-sensitive and not accent-friendly (see examples below).
This can be changed by setting the global ``locale`` option. This allows to
sort case and accents properly.
.. rubric:: Examples .. rubric:: Examples
@ -63,8 +65,24 @@ case-sensitive and not accent-friendly (see examples below).
- haïr: true - haïr: true
hais: true hais: true
#. With global option ``locale: "en_US.UTF-8"`` and rule ``key-ordering: {}``
as opposed to before, the following code snippet would now **PASS**:
::
- t-shirt: 1
T-shirt: 2
t-shirts: 3
T-shirts: 4
- hair: true
haïr: true
hais: true
haïssable: true
""" """
from locale import strcoll
import yaml import yaml
from yamllint.linter import LintProblem from yamllint.linter import LintProblem
@ -101,7 +119,8 @@ def check(conf, token, prev, next, nextnext, context):
# This check is done because KeyTokens can be found inside flow # This check is done because KeyTokens can be found inside flow
# sequences... strange, but allowed. # sequences... strange, but allowed.
if len(context['stack']) > 0 and context['stack'][-1].type == MAP: if len(context['stack']) > 0 and context['stack'][-1].type == MAP:
if any(next.value < key for key in context['stack'][-1].keys): if any(strcoll(next.value, key) < 0
for key in context['stack'][-1].keys):
yield LintProblem( yield LintProblem(
next.start_mark.line + 1, next.start_mark.column + 1, next.start_mark.line + 1, next.start_mark.column + 1,
'wrong ordering of key "%s" in mapping' % next.value) 'wrong ordering of key "%s" in mapping' % next.value)