diff options
author | Guido van Rossum <guido@python.org> | 1997-10-08 02:08:04 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1997-10-08 02:08:04 (GMT) |
commit | 5bc5b14f6d01e11a8e10c3e937e7e83eff556178 (patch) | |
tree | 58e65e156db3e939856df4ef2faf8f17d52e024a /Lib/re.py | |
parent | c3861078385ee8f546ae9d16d47b2f3447f3044d (diff) | |
download | cpython-5bc5b14f6d01e11a8e10c3e937e7e83eff556178.zip cpython-5bc5b14f6d01e11a8e10c3e937e7e83eff556178.tar.gz cpython-5bc5b14f6d01e11a8e10c3e937e7e83eff556178.tar.bz2 |
Checking in AMK's latest installement.
Diffstat (limited to 'Lib/re.py')
-rw-r--r-- | Lib/re.py | 193 |
1 files changed, 0 insertions, 193 deletions
@@ -7,9 +7,6 @@ import sys import string from pcre import * -[ NORMAL, CHARCLASS, REPLACEMENT ] = range(3) -[ CHAR, MEMORY_REFERENCE, SYNTAX, NOT_SYNTAX, SET, WORD_BOUNDARY, NOT_WORD_BOUNDARY, BEGINNING_OF_BUFFER, END_OF_BUFFER ] = range(9) - # # First, the public part of the interface: # @@ -231,199 +228,9 @@ def escape(pattern): result.append(char) return string.join(result, '') -_idprog = None -def valid_identifier(id): - global _idprog - if not _idprog: - _idprog = compile(r"[a-zA-Z_]\w*$") - if _idprog.match(id): - return 1 - else: - return 0 - def compile(pattern, flags=0): groupindex={} code=pcre_compile(pattern, flags, groupindex) return RegexObject(pattern, flags, code, groupindex) -def _expand(m, repl): - results = [] - index = 0 - size = len(repl) - while index < size: - found = string.find(repl, '\\', index) - if found < 0: - results.append(repl[index:]) - break - if found > index: - results.append(repl[index:found]) - escape_type, value, index = _expand_escape(repl, found+1, REPLACEMENT) - if escape_type == CHAR: - results.append(value) - elif escape_type == MEMORY_REFERENCE: - r = m.group(value) - if r is None: - raise error, ('group "' + str(value) + '" did not contribute ' - 'to the match') - results.append(m.group(value)) - else: - raise error, "bad escape in replacement" - return string.join(results, '') - -def _expand_escape(pattern, index, context=NORMAL): - if index >= len(pattern): - raise error, 'escape ends too soon' - - elif pattern[index] == 't': - return CHAR, chr(9), index + 1 - - elif pattern[index] == 'n': - return CHAR, chr(10), index + 1 - - elif pattern[index] == 'v': - return CHAR, chr(11), index + 1 - - elif pattern[index] == 'r': - return CHAR, chr(13), index + 1 - - elif pattern[index] == 'f': - return CHAR, chr(12), index + 1 - - elif pattern[index] == 'a': - return CHAR, chr(7), index + 1 - - elif pattern[index] == 'x': - # CAUTION: this is the Python rule, not the Perl rule! - end = index + 1 # Skip over the 'x' character - while (end < len(pattern)) and (pattern[end] in string.hexdigits): - end = end + 1 - if end == index: - raise error, "\\x must be followed by hex digit(s)" - # let Python evaluate it, so we don't incorrectly 2nd-guess - # what it's doing (and Python in turn passes it on to sscanf, - # so that *it* doesn't incorrectly 2nd-guess what C does!) - char = eval ('"' + pattern[index-1:end] + '"') -# assert len(char) == 1 - return CHAR, char, end - - elif pattern[index] == 'b': - if context != NORMAL: - return CHAR, chr(8), index + 1 - else: - return WORD_BOUNDARY, '', index + 1 - - elif pattern[index] == 'B': - if context != NORMAL: - return CHAR, 'B', index + 1 - else: - return NOT_WORD_BOUNDARY, '', index + 1 - - elif pattern[index] == 'A': - if context != NORMAL: - return CHAR, 'A', index + 1 - else: - return BEGINNING_OF_BUFFER, '', index + 1 - - elif pattern[index] == 'Z': - if context != NORMAL: - return CHAR, 'Z', index + 1 - else: - return END_OF_BUFFER, '', index + 1 - - elif pattern[index] in 'GluLUQE': - raise error, ('\\' + pattern[index] + ' is not allowed') - - elif pattern[index] == 'w': - return CHAR, 'w', index + 1 - - elif pattern[index] == 'W': - return CHAR, 'W', index + 1 - - elif pattern[index] == 's': - return CHAR, 's', index + 1 - - elif pattern[index] == 'S': - return CHAR, 'S', index + 1 - - elif pattern[index] == 'd': - return CHAR, 'd', index + 1 - - elif pattern[index] == 'D': - return CHAR, 'D', index + 1 - - elif pattern[index] in '0123456789': - - if pattern[index] == '0': - if (index + 1 < len(pattern)) and \ - (pattern[index + 1] in string.octdigits): - if (index + 2 < len(pattern)) and \ - (pattern[index + 2] in string.octdigits): - value = string.atoi(pattern[index:index + 3], 8) - index = index + 3 - - else: - value = string.atoi(pattern[index:index + 2], 8) - index = index + 2 - - else: - value = 0 - index = index + 1 - - if value > 255: - raise error, 'octal value out of range' - - return CHAR, chr(value), index - - else: - if (index + 1 < len(pattern)) and \ - (pattern[index + 1] in string.digits): - if (index + 2 < len(pattern)) and \ - (pattern[index + 2] in string.octdigits) and \ - (pattern[index + 1] in string.octdigits) and \ - (pattern[index] in string.octdigits): - value = string.atoi(pattern[index:index + 3], 8) - if value > 255: - raise error, 'octal value out of range' - - return CHAR, chr(value), index + 3 - - else: - value = string.atoi(pattern[index:index + 2]) - if (value < 1) or (value > 99): - raise error, 'memory reference out of range' - - if context == CHARCLASS: - raise error, ('cannot reference a register from ' - 'inside a character class') - return MEMORY_REFERENCE, value, index + 2 - - else: - if context == CHARCLASS: - raise error, ('cannot reference a register from ' - 'inside a character class') - - value = string.atoi(pattern[index]) - return MEMORY_REFERENCE, value, index + 1 - - elif pattern[index] == 'g': - if context != REPLACEMENT: - return CHAR, 'g', index + 1 - - index = index + 1 - if index >= len(pattern): - raise error, 'unfinished symbolic reference' - if pattern[index] != '<': - raise error, 'missing < in symbolic reference' - - index = index + 1 - end = string.find(pattern, '>', index) - if end == -1: - raise error, 'unfinished symbolic reference' - value = pattern[index:end] - if not valid_identifier(value): - raise error, 'illegal symbolic reference' - return MEMORY_REFERENCE, value, end + 1 - - else: - return CHAR, pattern[index], index + 1 |