mirror of
https://github.com/sphinx-doc/sphinx.git
synced 2025-02-25 18:55:22 -06:00
98 lines
2.7 KiB
Python
98 lines
2.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
scanner
|
|
~~~~~~~
|
|
|
|
This library implements a regex based scanner.
|
|
|
|
:copyright: 2006-2007 by Armin Ronacher, Georg Brandl.
|
|
:license: BSD license.
|
|
"""
|
|
import re
|
|
|
|
|
|
class EndOfText(RuntimeError):
|
|
"""
|
|
Raise if end of text is reached and the user
|
|
tried to call a match function.
|
|
"""
|
|
|
|
|
|
class Scanner(object):
|
|
"""
|
|
Simple scanner
|
|
|
|
All method patterns are regular expression strings (not
|
|
compiled expressions!)
|
|
"""
|
|
|
|
def __init__(self, text, flags=0):
|
|
"""
|
|
:param text: The text which should be scanned
|
|
:param flags: default regular expression flags
|
|
"""
|
|
self.data = text
|
|
self.data_length = len(text)
|
|
self.start_pos = 0
|
|
self.pos = 0
|
|
self.flags = flags
|
|
self.last = None
|
|
self.match = None
|
|
self._re_cache = {}
|
|
|
|
def eos(self):
|
|
"""`True` if the scanner reached the end of text."""
|
|
return self.pos >= self.data_length
|
|
eos = property(eos, eos.__doc__)
|
|
|
|
def check(self, pattern):
|
|
"""
|
|
Apply `pattern` on the current position and return
|
|
the match object. (Doesn't touch pos). Use this for
|
|
lookahead.
|
|
"""
|
|
if self.eos:
|
|
raise EndOfText()
|
|
if pattern not in self._re_cache:
|
|
self._re_cache[pattern] = re.compile(pattern, self.flags)
|
|
return self._re_cache[pattern].match(self.data, self.pos)
|
|
|
|
def test(self, pattern):
|
|
"""Apply a pattern on the current position and check
|
|
if it patches. Doesn't touch pos."""
|
|
return self.check(pattern) is not None
|
|
|
|
def scan(self, pattern):
|
|
"""
|
|
Scan the text for the given pattern and update pos/match
|
|
and related fields. The return value is a boolen that
|
|
indicates if the pattern matched. The matched value is
|
|
stored on the instance as ``match``, the last value is
|
|
stored as ``last``. ``start_pos`` is the position of the
|
|
pointer before the pattern was matched, ``pos`` is the
|
|
end position.
|
|
"""
|
|
if self.eos:
|
|
raise EndOfText()
|
|
if pattern not in self._re_cache:
|
|
self._re_cache[pattern] = re.compile(pattern, self.flags)
|
|
self.last = self.match
|
|
m = self._re_cache[pattern].match(self.data, self.pos)
|
|
if m is None:
|
|
return False
|
|
self.start_pos = m.start()
|
|
self.pos = m.end()
|
|
self.match = m
|
|
return True
|
|
|
|
def get_char(self):
|
|
"""Scan exactly one char."""
|
|
self.scan('.')
|
|
|
|
def __repr__(self):
|
|
return '<%s %d/%d>' % (
|
|
self.__class__.__name__,
|
|
self.pos,
|
|
self.data_length
|
|
)
|