summaryrefslogtreecommitdiffstats
path: root/Lib/sre_parse.py
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2008-08-19 17:56:33 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2008-08-19 17:56:33 (GMT)
commitfd036451bf0e0ade8783e21df801abf7be96d020 (patch)
treee70ff65a9e641d8e790bc091f0dc2507baf344ca /Lib/sre_parse.py
parent3ad7ba10a20827b24d4b1aa9dd49474db8affbdd (diff)
downloadcpython-fd036451bf0e0ade8783e21df801abf7be96d020.zip
cpython-fd036451bf0e0ade8783e21df801abf7be96d020.tar.gz
cpython-fd036451bf0e0ade8783e21df801abf7be96d020.tar.bz2
#2834: Change re module semantics, so that str and bytes mixing is forbidden,
and str (unicode) patterns get full unicode matching by default. The re.ASCII flag is also introduced to ask for ASCII matching instead.
Diffstat (limited to 'Lib/sre_parse.py')
-rw-r--r--Lib/sre_parse.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index ffa8902..9d6e631 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -64,6 +64,7 @@ FLAGS = {
"s": SRE_FLAG_DOTALL,
"x": SRE_FLAG_VERBOSE,
# extensions
+ "a": SRE_FLAG_ASCII,
"t": SRE_FLAG_TEMPLATE,
"u": SRE_FLAG_UNICODE,
}
@@ -672,6 +673,18 @@ def _parse(source, state):
return subpattern
+def fix_flags(src, flags):
+ # Check and fix flags according to the type of pattern (str or bytes)
+ if isinstance(src, str):
+ if not flags & SRE_FLAG_ASCII:
+ flags |= SRE_FLAG_UNICODE
+ elif flags & SRE_FLAG_UNICODE:
+ raise ValueError("ASCII and UNICODE flags are incompatible")
+ else:
+ if flags & SRE_FLAG_UNICODE:
+ raise ValueError("can't use UNICODE flag with a bytes pattern")
+ return flags
+
def parse(str, flags=0, pattern=None):
# parse 're' pattern into list of (opcode, argument) tuples
@@ -683,6 +696,7 @@ def parse(str, flags=0, pattern=None):
pattern.str = str
p = _parse_sub(source, pattern, 0)
+ p.pattern.flags = fix_flags(str, p.pattern.flags)
tail = source.get()
if tail == ")":