diff options
Diffstat (limited to 'Lib/re.py')
| -rw-r--r-- | Lib/re.py | 193 | 
1 files changed, 0 insertions, 193 deletions
@@ -7,9 +7,6 @@ import sys  import string  from pcre import * -[ NORMAL, CHARCLASS, REPLACEMENT ] = range(3) -[ CHAR, MEMORY_REFERENCE, SYNTAX, NOT_SYNTAX, SET, WORD_BOUNDARY, NOT_WORD_BOUNDARY, BEGINNING_OF_BUFFER, END_OF_BUFFER ] = range(9) -  #  # First, the public part of the interface:  # @@ -231,199 +228,9 @@ def escape(pattern):  	result.append(char)      return string.join(result, '') -_idprog = None -def valid_identifier(id): -    global _idprog -    if not _idprog: -	_idprog = compile(r"[a-zA-Z_]\w*$") -    if _idprog.match(id): -	return 1 -    else: -	return 0 -  def compile(pattern, flags=0):      groupindex={}      code=pcre_compile(pattern, flags, groupindex)      return RegexObject(pattern, flags, code, groupindex) -def _expand(m, repl): -    results = [] -    index = 0 -    size = len(repl) -    while index < size: -	found = string.find(repl, '\\', index) -	if found < 0: -	    results.append(repl[index:]) -	    break -	if found > index: -	    results.append(repl[index:found]) -	escape_type, value, index = _expand_escape(repl, found+1, REPLACEMENT) -	if escape_type == CHAR: -	    results.append(value) -	elif escape_type == MEMORY_REFERENCE: -	    r = m.group(value) -	    if r is None: -		raise error, ('group "' + str(value) + '" did not contribute ' -			      'to the match') -	    results.append(m.group(value)) -	else: -	    raise error, "bad escape in replacement" -    return string.join(results, '') - -def _expand_escape(pattern, index, context=NORMAL): -    if index >= len(pattern): -	raise error, 'escape ends too soon' - -    elif pattern[index] == 't': -	return CHAR, chr(9), index + 1 -     -    elif pattern[index] == 'n': -	return CHAR, chr(10), index + 1 -     -    elif pattern[index] == 'v': -	return CHAR, chr(11), index + 1 -     -    elif pattern[index] == 'r': -	return CHAR, chr(13), index + 1 -     -    elif pattern[index] == 'f': -	return CHAR, chr(12), index + 1 -     -    elif pattern[index] == 'a': -	return CHAR, chr(7), index + 1 -     -    elif pattern[index] == 'x': -	# CAUTION: this is the Python rule, not the Perl rule! -	end = index + 1  # Skip over the 'x' character -	while (end < len(pattern)) and (pattern[end] in string.hexdigits): -	    end = end + 1 -	if end == index: -	    raise error, "\\x must be followed by hex digit(s)" -	# let Python evaluate it, so we don't incorrectly 2nd-guess -	# what it's doing (and Python in turn passes it on to sscanf, -	# so that *it* doesn't incorrectly 2nd-guess what C does!) -	char = eval ('"' + pattern[index-1:end] + '"') -#	assert len(char) == 1 -	return CHAR, char, end - -    elif pattern[index] == 'b': -	if context != NORMAL: -	    return CHAR, chr(8), index + 1 -	else: -	    return WORD_BOUNDARY, '', index + 1 -	     -    elif pattern[index] == 'B': -	if context != NORMAL: -	    return CHAR, 'B', index + 1 -	else: -	    return NOT_WORD_BOUNDARY, '', index + 1 -	     -    elif pattern[index] == 'A': -	if context != NORMAL: -	    return CHAR, 'A', index + 1 -	else: -	    return BEGINNING_OF_BUFFER, '', index + 1 -	     -    elif pattern[index] == 'Z': -	if context != NORMAL: -	    return CHAR, 'Z', index + 1 -	else: -	    return END_OF_BUFFER, '', index + 1 -	     -    elif pattern[index] in 'GluLUQE': -	raise error, ('\\' + pattern[index] + ' is not allowed') -     -    elif pattern[index] == 'w': -	    return CHAR, 'w', index + 1 -	 -    elif pattern[index] == 'W': -	    return CHAR, 'W', index + 1 -	 -    elif pattern[index] == 's': -	    return CHAR, 's', index + 1 -	 -    elif pattern[index] == 'S': -	    return CHAR, 'S', index + 1 -	 -    elif pattern[index] == 'd': -	    return CHAR, 'd', index + 1 -	 -    elif pattern[index] == 'D': -	    return CHAR, 'D', index + 1 - -    elif pattern[index] in '0123456789': - -	if pattern[index] == '0': -	    if (index + 1 < len(pattern)) and \ -	       (pattern[index + 1] in string.octdigits): -		if (index + 2 < len(pattern)) and \ -		   (pattern[index + 2] in string.octdigits): -		    value = string.atoi(pattern[index:index + 3], 8) -		    index = index + 3 - -		else: -		    value = string.atoi(pattern[index:index + 2], 8) -		    index = index + 2 - -	    else: -		value = 0 -		index = index + 1 - -	    if value > 255: -		raise error, 'octal value out of range' - -	    return CHAR, chr(value), index -	 -	else: -	    if (index + 1 < len(pattern)) and \ -	       (pattern[index + 1] in string.digits): -		if (index + 2 < len(pattern)) and \ -		   (pattern[index + 2] in string.octdigits) and \ -		   (pattern[index + 1] in string.octdigits) and \ -		   (pattern[index] in string.octdigits): -		    value = string.atoi(pattern[index:index + 3], 8) -		    if value > 255: -			raise error, 'octal value out of range' - -		    return CHAR, chr(value), index + 3 - -		else: -		    value = string.atoi(pattern[index:index + 2]) -		    if (value < 1) or (value > 99): -			raise error, 'memory reference out of range' - -		    if context == CHARCLASS: -			raise error, ('cannot reference a register from ' -				      'inside a character class') -		    return MEMORY_REFERENCE, value, index + 2 - -	    else: -		if context == CHARCLASS: -		    raise error, ('cannot reference a register from ' -				  'inside a character class') - -		value = string.atoi(pattern[index]) -		return MEMORY_REFERENCE, value, index + 1 -	     -    elif pattern[index] == 'g': -	if context != REPLACEMENT: -	    return CHAR, 'g', index + 1 - -	index = index + 1 -	if index >= len(pattern): -	    raise error, 'unfinished symbolic reference' -	if pattern[index] != '<': -	    raise error, 'missing < in symbolic reference' - -	index = index + 1 -	end = string.find(pattern, '>', index) -	if end == -1: -	    raise error, 'unfinished symbolic reference' -	value = pattern[index:end] -	if not valid_identifier(value): -	    raise error, 'illegal symbolic reference' -	return MEMORY_REFERENCE, value, end + 1 -     -    else: -	return CHAR, pattern[index], index + 1  | 
