linter: Fix UnicodeError when parsing comments

And add tests when reading non-ASCII strings and comments (both from
Python strings and from files).

Fixes: #10
This commit is contained in:
Adrien Vergé 2016-06-28 09:58:23 +02:00
parent 63dd8313f8
commit c8ba8f7e99
4 changed files with 43 additions and 3 deletions

View File

@ -64,6 +64,15 @@ class CommandLineTestCase(unittest.TestCase):
f.write('---\n'
'key: value\n')
# non-ASCII chars
os.mkdir(os.path.join(self.wd, 'non-ascii'))
with open(os.path.join(self.wd, 'non-ascii', 'utf-8'), 'wb') as f:
f.write((u'---\n'
u'- hétérogénéité\n'
u'# 19.99 €\n'
u'- お早う御座います。\n'
u'# الأَبْجَدِيَّة العَرَبِيَّة\n').encode('utf-8'))
def tearDown(self):
shutil.rmtree(self.wd)
@ -261,6 +270,19 @@ class CommandLineTestCase(unittest.TestCase):
self.assertEqual(out, '')
self.assertEqual(err, '')
def test_run_non_ascii_file(self):
file = os.path.join(self.wd, 'non-ascii', 'utf-8')
sys.stdout, sys.stderr = StringIO(), StringIO()
with self.assertRaises(SystemExit) as ctx:
cli.run(('-f', 'parsable', file))
self.assertEqual(ctx.exception.code, 0)
out, err = sys.stdout.getvalue(), sys.stderr.getvalue()
self.assertEqual(out, '')
self.assertEqual(err, '')
def test_run_multiple_files(self):
items = [os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 's')]

View File

@ -44,3 +44,15 @@ class LinterTestCase(unittest.TestCase):
def test_run_on_list(self):
self.assertRaises(TypeError, linter.run,
['h', 'e', 'l', 'l', 'o'], self.fake_config())
def test_run_on_non_ascii_chars(self):
s = (u'- hétérogénéité\n'
u'# 19.99 €\n')
linter.run(s, self.fake_config())
linter.run(s.encode('utf-8'), self.fake_config())
linter.run(s.encode('iso-8859-15'), self.fake_config())
s = (u'- お早う御座います。\n'
u'# الأَبْجَدِيَّة العَرَبِيَّة\n')
linter.run(s, self.fake_config())
linter.run(s.encode('utf-8'), self.fake_config())

View File

@ -71,7 +71,10 @@ def get_costemic_problems(buffer, conf):
self.all_rules = set([r.ID for r in rules])
def process_comment(self, comment):
comment = repr(comment)
try:
comment = str(comment)
except UnicodeError:
return # this certainly wasn't a yamllint directive comment
if re.match(r'^# yamllint disable( rule:\S+)*\s*$', comment):
rules = [item[5:] for item in comment[18:].split(' ')][1:]
@ -95,7 +98,10 @@ def get_costemic_problems(buffer, conf):
class DisableLineDirective(DisableDirective):
def process_comment(self, comment):
comment = repr(comment)
try:
comment = str(comment)
except UnicodeError:
return # this certainly wasn't a yamllint directive comment
if re.match(r'^# yamllint disable-line( rule:\S+)*\s*$', comment):
rules = [item[5:] for item in comment[23:].split(' ')][1:]

View File

@ -49,7 +49,7 @@ class Comment(object):
self.token_after = token_after
self.comment_before = comment_before
def __repr__(self):
def __str__(self):
end = self.buffer.find('\n', self.pointer)
if end == -1:
end = self.buffer.find('\0', self.pointer)