sphinx/utils/check_sources.py

260 lines
7.8 KiB
Python
Raw Normal View History

2007-07-23 04:02:25 -05:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Checker for file headers
~~~~~~~~~~~~~~~~~~~~~~~~
Make sure each Python file has a correct file header
including copyright and license information.
2016-01-14 15:54:04 -06:00
:copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
2009-03-09 08:25:36 -05:00
:license: BSD, see LICENSE for details.
2007-07-23 04:02:25 -05:00
"""
from __future__ import print_function
2007-07-23 04:02:25 -05:00
2015-09-18 13:32:14 -05:00
import os
import re
import sys
from optparse import OptionParser
2007-07-23 04:02:25 -05:00
from os.path import join, splitext, abspath
checkers = {}
2015-09-18 13:32:14 -05:00
2007-07-23 04:02:25 -05:00
def checker(*suffixes, **kwds):
only_pkg = kwds.pop('only_pkg', False)
2015-09-18 13:32:14 -05:00
2007-07-23 04:02:25 -05:00
def deco(func):
for suffix in suffixes:
checkers.setdefault(suffix, []).append(func)
func.only_pkg = only_pkg
return func
return deco
2015-09-18 13:32:14 -05:00
# this one is a byte regex since it is applied before decoding
coding_re = re.compile(br'coding[:=]\s*([-\w.]+)')
uni_coding_re = re.compile(r'^#.*coding[:=]\s*([-\w.]+).*')
2007-07-23 04:02:25 -05:00
name_mail_re = r'[\w ]+(<.*?>)?'
2015-09-18 13:32:14 -05:00
copyright_re = re.compile(r'^ :copyright: Copyright 200\d(-20\d\d)? '
r'by %s(, %s)*[,.]$' %
(name_mail_re, name_mail_re))
2015-09-18 13:32:14 -05:00
license_re = re.compile(r" :license: (.*?).\n")
copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' %
(name_mail_re, name_mail_re))
2015-09-18 13:32:14 -05:00
not_ix_re = re.compile(r'\bnot\s+\S+?\s+i[sn]\s\S+')
is_const_re = re.compile(r'if.*?==\s+(None|False|True)\b')
misspellings = ["developement", "adress", # ALLOW-MISSPELLING
"verificate", "informations"] # ALLOW-MISSPELLING
2010-05-16 17:44:44 -05:00
2015-09-18 13:32:14 -05:00
def decode_source(fn, lines):
encoding = 'ascii' if fn.endswith('.py') else 'utf-8'
decoded_lines = []
for lno, line in enumerate(lines):
if lno < 2:
co = coding_re.search(line)
if co:
encoding = co.group(1).decode()
2010-05-16 17:44:44 -05:00
try:
2015-09-18 13:32:14 -05:00
decoded_lines.append(line.decode(encoding))
except UnicodeDecodeError as err:
raise UnicodeError("%s:%d: not decodable: %s\n Line: %r" %
(fn, lno+1, err, line))
except LookupError as err:
raise LookupError("unknown encoding: %s" % encoding)
return decoded_lines
@checker('.py')
def check_syntax(fn, lines):
lines = [uni_coding_re.sub('', line) for line in lines]
try:
compile(''.join(lines), fn, "exec")
except SyntaxError as err:
yield 0, "not compilable: %s" % err
2007-07-23 04:02:25 -05:00
@checker('.py')
2015-09-18 13:32:14 -05:00
def check_style(fn, lines):
2007-07-23 04:02:25 -05:00
for lno, line in enumerate(lines):
if len(line.rstrip('\n')) > 95:
2007-07-23 04:02:25 -05:00
yield lno+1, "line too long"
2015-09-18 13:32:14 -05:00
if line.strip().startswith('#'):
continue
2015-09-18 13:32:14 -05:00
# m = not_ix_re.search(line)
# if m:
# yield lno+1, '"' + m.group() + '"'
2007-07-23 04:02:25 -05:00
if is_const_re.search(line):
yield lno+1, 'using == None/True/False'
@checker('.py', only_pkg=True)
def check_fileheader(fn, lines):
# line number correction
c = 1
2015-09-18 13:32:14 -05:00
if lines[0:1] == ['#!/usr/bin/env python\n']:
2007-07-23 04:02:25 -05:00
lines = lines[1:]
c = 2
llist = []
docopen = False
for lno, l in enumerate(lines):
llist.append(l)
if lno == 0:
2015-09-18 13:32:14 -05:00
if l != '# -*- coding: utf-8 -*-\n':
2007-07-23 04:02:25 -05:00
yield 1, "missing coding declaration"
elif lno == 1:
2015-09-18 13:32:14 -05:00
if l != '"""\n' and l != 'r"""\n':
2007-07-23 04:02:25 -05:00
yield 2, 'missing docstring begin (""")'
else:
docopen = True
elif docopen:
2015-09-18 13:32:14 -05:00
if l == '"""\n':
2007-07-23 04:02:25 -05:00
# end of docstring
if lno <= 4:
yield lno+c, "missing module name in docstring"
break
2015-09-18 13:32:14 -05:00
if l != '\n' and l[:4] != ' ' and docopen:
2007-07-23 04:02:25 -05:00
yield lno+c, "missing correct docstring indentation"
if lno == 2:
# if not in package, don't check the module name
modname = fn[:-3].replace('/', '.').replace('.__init__', '')
while modname:
2015-09-18 13:32:14 -05:00
if l.lower()[4:-1] == modname:
2007-07-23 04:02:25 -05:00
break
modname = '.'.join(modname.split('.')[1:])
else:
yield 3, "wrong module name in docstring heading"
modnamelen = len(l.strip())
elif lno == 3:
2015-09-18 13:32:14 -05:00
if l.strip() != modnamelen * '~':
2007-07-23 04:02:25 -05:00
yield 4, "wrong module name underline, should be ~~~...~"
else:
yield 0, "missing end and/or start of docstring..."
# check for copyright and license fields
license = llist[-2:-1]
if not license or not license_re.match(license[0]):
yield 0, "no correct license info"
ci = -3
copyright = llist[ci:ci+1]
while copyright and copyright_2_re.match(copyright[0]):
ci -= 1
copyright = llist[ci:ci+1]
if not copyright or not copyright_re.match(copyright[0]):
yield 0, "no correct copyright info"
@checker('.py', '.html', '.rst')
2007-07-23 04:02:25 -05:00
def check_whitespace_and_spelling(fn, lines):
for lno, line in enumerate(lines):
2015-09-18 13:32:14 -05:00
if '\t' in line:
2007-07-23 04:02:25 -05:00
yield lno+1, "OMG TABS!!!1 "
2015-09-18 13:32:14 -05:00
if line[:-1].rstrip(' \t') != line[:-1]:
2007-07-23 04:02:25 -05:00
yield lno+1, "trailing whitespace"
for word in misspellings:
2015-09-18 13:32:14 -05:00
if word in line and 'ALLOW-MISSPELLING' not in line:
2007-07-23 04:02:25 -05:00
yield lno+1, '"%s" used' % word
2015-09-18 13:32:14 -05:00
bad_tags = ['<u>', '<s>', '<strike>', '<center>', '<font']
2007-07-23 04:02:25 -05:00
@checker('.html')
def check_xhtml(fn, lines):
for lno, line in enumerate(lines):
for bad_tag in bad_tags:
if bad_tag in line:
yield lno+1, "used " + bad_tag
def main(argv):
parser = OptionParser(usage='Usage: %prog [-v] [-i ignorepath]* [path]')
parser.add_option('-v', '--verbose', dest='verbose', default=False,
action='store_true')
parser.add_option('-i', '--ignore-path', dest='ignored_paths',
default=[], action='append')
options, args = parser.parse_args(argv[1:])
2007-07-23 04:02:25 -05:00
if len(args) == 0:
path = '.'
elif len(args) == 1:
path = args[0]
else:
print(args)
parser.error('No more then one path supported')
2007-07-23 04:02:25 -05:00
verbose = options.verbose
ignored_paths = set(abspath(p) for p in options.ignored_paths)
2007-07-23 04:02:25 -05:00
num = 0
for root, dirs, files in os.walk(path):
for vcs_dir in ['.svn', '.hg', '.git']:
if vcs_dir in dirs:
dirs.remove(vcs_dir)
if abspath(root) in ignored_paths:
2007-07-23 04:02:25 -05:00
del dirs[:]
continue
in_check_pkg = root.startswith('./sphinx')
for fn in files:
fn = join(root, fn)
2015-09-18 13:32:14 -05:00
if fn[:2] == './':
fn = fn[2:]
2007-07-23 04:02:25 -05:00
if abspath(fn) in ignored_paths:
2007-07-23 04:02:25 -05:00
continue
ext = splitext(fn)[1]
checkerlist = checkers.get(ext, None)
if not checkerlist:
continue
if verbose:
print("Checking %s..." % fn)
2007-07-23 04:02:25 -05:00
try:
2010-05-16 17:44:44 -05:00
f = open(fn, 'rb')
2010-05-16 12:45:23 -05:00
try:
lines = list(f)
finally:
f.close()
except (IOError, OSError) as err:
print("%s: cannot open: %s" % (fn, err))
2007-07-23 04:02:25 -05:00
num += 1
continue
2015-09-18 13:32:14 -05:00
try:
lines = decode_source(fn, lines)
except Exception as err:
print(err)
num += 1
continue
2007-07-23 04:02:25 -05:00
for checker in checkerlist:
if not in_check_pkg and checker.only_pkg:
continue
for lno, msg in checker(fn, lines):
2015-09-18 13:32:14 -05:00
print("%s:%d: %s" % (fn, lno, msg))
2007-07-23 04:02:25 -05:00
num += 1
if verbose:
print()
2007-07-23 04:02:25 -05:00
if num == 0:
print("No errors found.")
2007-07-23 04:02:25 -05:00
else:
print("%d error%s found." % (num, num > 1 and "s" or ""))
2007-07-23 04:02:25 -05:00
return int(num > 0)
if __name__ == '__main__':
sys.exit(main(sys.argv))