diff --git a/docs/configuration.rst b/docs/configuration.rst index d0e920b..a93f21c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -189,3 +189,22 @@ Here is a more complex example: ignore: | *.ignore-trailing-spaces.yaml ascii-art/* + +Setting the locale +------------------ + +It is possible to set the ``locale`` option globally. This is passed to Python's +`locale.setlocale +`_, +so an empty string ``""`` will use the system default locale, while e.g. +``"en_US.UTF-8"`` will use that. If unset, the default is ``"C.UTF-8"``. + +Currently this only affects the ``key-ordering`` rule. The default will order +by Unicode code point number, while other locales will sort case and accents +properly as well. + +.. code-block:: yaml + + extends: default + + locale: en_US.UTF-8 diff --git a/tests/rules/test_key_ordering.py b/tests/rules/test_key_ordering.py index dc486af..54bab4a 100644 --- a/tests/rules/test_key_ordering.py +++ b/tests/rules/test_key_ordering.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import locale + from tests.common import RuleTestCase @@ -103,10 +105,6 @@ class KeyOrderingTestCase(RuleTestCase): 'haïr: true\n' 'hais: true\n', conf, problem=(3, 1)) - self.check('---\n' - 'haïr: true\n' - 'hais: true\n', conf, - problem=(3, 1)) def test_key_tokens_in_flow_sequences(self): conf = 'key-ordering: enable' @@ -114,3 +112,39 @@ class KeyOrderingTestCase(RuleTestCase): '[\n' ' key: value, mappings, in, flow: sequence\n' ']\n', conf) + + def test_locale_case(self): + self.addCleanup(locale.setlocale, locale.LC_ALL, 'C.UTF-8') + try: + locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + except locale.Error: + self.skipTest('locale en_US.UTF-8 not available') + conf = ('key-ordering: enable') + self.check('---\n' + 't-shirt: 1\n' + 'T-shirt: 2\n' + 't-shirts: 3\n' + 'T-shirts: 4\n', conf) + self.check('---\n' + 't-shirt: 1\n' + 't-shirts: 2\n' + 'T-shirt: 3\n' + 'T-shirts: 4\n', conf, + problem=(4, 1)) + + def test_locale_accents(self): + self.addCleanup(locale.setlocale, locale.LC_ALL, 'C.UTF-8') + try: + locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + except locale.Error: + self.skipTest('locale en_US.UTF-8 not available') + conf = ('key-ordering: enable') + self.check('---\n' + 'hair: true\n' + 'haïr: true\n' + 'hais: true\n' + 'haïssable: true\n', conf) + self.check('---\n' + 'hais: true\n' + 'haïr: true\n', conf, + problem=(3, 1)) diff --git a/tests/test_cli.py b/tests/test_cli.py index 450507b..c4478d2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -95,6 +95,13 @@ class CommandLineTestCase(unittest.TestCase): # dos line endings yaml 'dos.yml': '---\r\n' 'dos: true', + # different key-ordering by locale + 'c.yaml': '---\n' + 'A: true\n' + 'a: true', + 'en.yaml': '---\n' + 'a: true\n' + 'A: true' }) @classmethod @@ -108,8 +115,10 @@ class CommandLineTestCase(unittest.TestCase): self.assertEqual( sorted(cli.find_files_recursively([self.wd], conf)), [os.path.join(self.wd, 'a.yaml'), + os.path.join(self.wd, 'c.yaml'), os.path.join(self.wd, 'dos.yml'), os.path.join(self.wd, 'empty.yml'), + os.path.join(self.wd, 'en.yaml'), os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'), os.path.join(self.wd, 'sub/directory.yaml/empty.yml'), os.path.join(self.wd, 'sub/ok.yaml'), @@ -146,6 +155,8 @@ class CommandLineTestCase(unittest.TestCase): self.assertEqual( sorted(cli.find_files_recursively([self.wd], conf)), [os.path.join(self.wd, 'a.yaml'), + os.path.join(self.wd, 'c.yaml'), + os.path.join(self.wd, 'en.yaml'), os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'), os.path.join(self.wd, 'sub/ok.yaml'), os.path.join(self.wd, 'warn.yaml')] @@ -175,8 +186,10 @@ class CommandLineTestCase(unittest.TestCase): self.assertEqual( sorted(cli.find_files_recursively([self.wd], conf)), [os.path.join(self.wd, 'a.yaml'), + os.path.join(self.wd, 'c.yaml'), os.path.join(self.wd, 'dos.yml'), os.path.join(self.wd, 'empty.yml'), + os.path.join(self.wd, 'en.yaml'), os.path.join(self.wd, 'no-yaml.json'), os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'), os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'), @@ -194,8 +207,10 @@ class CommandLineTestCase(unittest.TestCase): self.assertEqual( sorted(cli.find_files_recursively([self.wd], conf)), [os.path.join(self.wd, 'a.yaml'), + os.path.join(self.wd, 'c.yaml'), os.path.join(self.wd, 'dos.yml'), os.path.join(self.wd, 'empty.yml'), + os.path.join(self.wd, 'en.yaml'), os.path.join(self.wd, 'no-yaml.json'), os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'), os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'), @@ -315,6 +330,39 @@ class CommandLineTestCase(unittest.TestCase): cli.run((os.path.join(self.wd, 'a.yaml'), )) self.assertEqual(ctx.returncode, 1) + def test_run_with_locale(self): + self.addCleanup(locale.setlocale, locale.LC_ALL, 'C.UTF-8') + try: + locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + except locale.Error: + self.skipTest('locale en_US.UTF-8 not available') + + # C + en.yaml should fail + with RunContext(self) as ctx: + cli.run(('-d', 'rules: { key-ordering: enable }', + os.path.join(self.wd, 'en.yaml'))) + self.assertEqual(ctx.returncode, 1) + + # en_US + en.yaml should pass + with RunContext(self) as ctx: + cli.run(('-d', 'locale: en_US.UTF-8\n' + 'rules: { key-ordering: enable }', + os.path.join(self.wd, 'en.yaml'))) + self.assertEqual(ctx.returncode, 0) + + # en_US + c.yaml should fail + with RunContext(self) as ctx: + cli.run(('-d', 'locale: en_US.UTF-8\n' + 'rules: { key-ordering: enable }', + os.path.join(self.wd, 'c.yaml'))) + self.assertEqual(ctx.returncode, 1) + + # C + c.yaml should pass + with RunContext(self) as ctx: + cli.run(('-d', 'rules: { key-ordering: enable }', + os.path.join(self.wd, 'c.yaml'))) + self.assertEqual(ctx.returncode, 0) + def test_run_version(self): with RunContext(self) as ctx: cli.run(('--version', )) @@ -373,15 +421,6 @@ class CommandLineTestCase(unittest.TestCase): def test_run_non_ascii_file(self): path = os.path.join(self.wd, 'non-ascii', 'éçäγλνπ¥', 'utf-8') - # Make sure the default localization conditions on this "system" - # support UTF-8 encoding. - loc = locale.getlocale() - try: - locale.setlocale(locale.LC_ALL, 'C.UTF-8') - except locale.Error: - locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') - self.addCleanup(locale.setlocale, locale.LC_ALL, loc) - with RunContext(self) as ctx: cli.run(('-f', 'parsable', path)) self.assertEqual((ctx.returncode, ctx.stdout, ctx.stderr), (0, '', '')) diff --git a/yamllint/cli.py b/yamllint/cli.py index 2566c89..68f8a41 100644 --- a/yamllint/cli.py +++ b/yamllint/cli.py @@ -18,6 +18,7 @@ from __future__ import print_function import argparse import io +import locale import os import platform import sys @@ -175,6 +176,8 @@ def run(argv=None): print(e, file=sys.stderr) sys.exit(-1) + locale.setlocale(locale.LC_ALL, conf.locale) + max_level = 0 for file in find_files_recursively(args.files, conf): diff --git a/yamllint/config.py b/yamllint/config.py index 0abb242..9092a65 100644 --- a/yamllint/config.py +++ b/yamllint/config.py @@ -35,6 +35,8 @@ class YamlLintConfig(object): self.yaml_files = pathspec.PathSpec.from_lines( 'gitwildmatch', ['*.yaml', '*.yml', '.yamllint']) + self.locale = 'C.UTF-8' + if file is not None: with open(file) as f: content = f.read() @@ -111,6 +113,12 @@ class YamlLintConfig(object): self.yaml_files = pathspec.PathSpec.from_lines('gitwildmatch', conf['yaml-files']) + if 'locale' in conf: + if not isinstance(conf['locale'], str): + raise YamlLintConfigError( + 'invalid config: locale should be a string') + self.locale = conf['locale'] + def validate(self): for id in self.rules: try: diff --git a/yamllint/rules/key_ordering.py b/yamllint/rules/key_ordering.py index 1ca992b..eca38ab 100644 --- a/yamllint/rules/key_ordering.py +++ b/yamllint/rules/key_ordering.py @@ -16,8 +16,10 @@ """ Use this rule to enforce alphabetical ordering of keys in mappings. The sorting -order uses the Unicode code point number. As a result, the ordering is -case-sensitive and not accent-friendly (see examples below). +order uses the Unicode code point number as a default. As a result, the +ordering is case-sensitive and not accent-friendly (see examples below). +This can be changed by setting the global ``locale`` option. This allows to +sort case and accents properly. .. rubric:: Examples @@ -63,8 +65,24 @@ case-sensitive and not accent-friendly (see examples below). - haïr: true hais: true + +#. With global option ``locale: "en_US.UTF-8"`` and rule ``key-ordering: {}`` + + as opposed to before, the following code snippet would now **PASS**: + :: + + - t-shirt: 1 + T-shirt: 2 + t-shirts: 3 + T-shirts: 4 + - hair: true + haïr: true + hais: true + haïssable: true """ +from locale import strcoll + import yaml from yamllint.linter import LintProblem @@ -101,7 +119,8 @@ def check(conf, token, prev, next, nextnext, context): # This check is done because KeyTokens can be found inside flow # sequences... strange, but allowed. if len(context['stack']) > 0 and context['stack'][-1].type == MAP: - if any(next.value < key for key in context['stack'][-1].keys): + if any(strcoll(next.value, key) < 0 + for key in context['stack'][-1].keys): yield LintProblem( next.start_mark.line + 1, next.start_mark.column + 1, 'wrong ordering of key "%s" in mapping' % next.value)